diff mbox

[01/11] trivial: Cleanup of 'parser'

Message ID 1468402860-3409-2-git-send-email-stephen.finucane@intel.com
State Superseded
Headers show

Commit Message

Stephen Finucane July 13, 2016, 9:40 a.m. UTC
Do some cleanup of the file by removing excess whitespace, adding some
documentation, removing shadowing of keywords and renaming some
functions to more accurately reflect their purpose.

Signed-off-by: Stephen Finucane <stephen.finucane@intel.com>
---
 patchwork/bin/parsemail.py |    4 +-
 patchwork/models.py        |    4 +-
 patchwork/parser.py        |   74 ++++++++++++++++++++-----------------------
 3 files changed, 38 insertions(+), 44 deletions(-)

Comments

Andy Doan July 19, 2016, 9:32 p.m. UTC | #1
On 07/13/2016 04:40 AM, Stephen Finucane wrote:
> Do some cleanup of the file by removing excess whitespace, adding some
> documentation, removing shadowing of keywords and renaming some
> functions to more accurately reflect their purpose.
>
> Signed-off-by: Stephen Finucane <stephen.finucane@intel.com>

Reviewed-by: Andy Doan <andy.doan@linaro.org>

> ---
>  patchwork/bin/parsemail.py |    4 +-
>  patchwork/models.py        |    4 +-
>  patchwork/parser.py        |   74 ++++++++++++++++++++-----------------------
>  3 files changed, 38 insertions(+), 44 deletions(-)
>
> diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
> index 8648d29..56cd126 100755
> --- a/patchwork/bin/parsemail.py
> +++ b/patchwork/bin/parsemail.py
> @@ -44,7 +44,7 @@ from django.utils.six.moves import map
>  from patchwork.models import (Patch, Project, Person, Comment, State,
>                                DelegationRule, Submission, CoverLetter,
>                                get_default_initial_patch_state)
> -from patchwork.parser import parse_patch, patch_get_filenames
> +from patchwork.parser import parse_patch, find_filenames
>
>  LOGGER = logging.getLogger(__name__)
>
> @@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None):
>
>          delegate = find_delegate(mail)
>          if not delegate and diff:
> -            filenames = patch_get_filenames(diff)
> +            filenames = find_filenames(diff)
>              delegate = auto_delegate(project, filenames)
>
>          patch = Patch(
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 521b20c..ee2ee63 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -35,7 +35,7 @@ from django.utils.functional import cached_property
>  from django.utils.six.moves import filter
>
>  from patchwork.fields import HashField
> -from patchwork.parser import extract_tags, hash_patch
> +from patchwork.parser import extract_tags, hash_diff
>
>
>  @python_2_unicode_compatible
> @@ -364,7 +364,7 @@ class Patch(Submission):
>              self.state = get_default_initial_patch_state()
>
>          if self.hash is None and self.diff is not None:
> -            self.hash = hash_patch(self.diff).hexdigest()
> +            self.hash = hash_diff(self.diff).hexdigest()
>
>          super(Patch, self).save(**kwargs)
>
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 8bf9b21..f173431 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
>  _filename_re = re.compile('^(---|\+\+\+) (\S+)')
>
>
> -def parse_patch(text):
> +def parse_patch(content):
> +    """Split a mail's contents into a diff and comment.
> +
> +    This is a state machine that takes a patch, generally in UNIX mbox
> +    format, and splits it into the component comments and diff.
> +
> +    Args:
> +        patch: The patch to be split
> +
> +    Returns:
> +        A tuple containing the diff and comment. Either one or both of
> +        these can be empty.
> +
> +    Raises:
> +        Exception: The state machine transitioned to an invalid state.
> +    """
>      patchbuf = ''
>      commentbuf = ''
>      buf = ''
> @@ -68,7 +83,7 @@ def parse_patch(text):
>      lc = (0, 0)
>      hunk = 0
>
> -    for line in text.split('\n'):
> +    for line in content.split('\n'):
>          line += '\n'
>
>          if state == 0:
> @@ -76,14 +91,11 @@ def parse_patch(text):
>                      or line.startswith('Index: '):
>                  state = 1
>                  buf += line
> -
>              elif line.startswith('--- '):
>                  state = 2
>                  buf += line
> -
>              else:
>                  commentbuf += line
> -
>          elif state == 1:
>              buf += line
>              if line.startswith('--- '):
> @@ -91,25 +103,20 @@ def parse_patch(text):
>
>              if line.startswith(('rename from ', 'rename to ')):
>                  state = 6
> -
>          elif state == 2:
>              if line.startswith('+++ '):
>                  state = 3
>                  buf += line
> -
>              elif hunk:
>                  state = 1
>                  buf += line
> -
>              else:
>                  state = 0
>                  commentbuf += buf + line
>                  buf = ''
> -
>          elif state == 3:
>              match = _hunk_re.match(line)
>              if match:
> -
>                  def fn(x):
>                      if not x:
>                          return 1
> @@ -120,26 +127,21 @@ def parse_patch(text):
>                  state = 4
>                  patchbuf += buf + line
>                  buf = ''
> -
>              elif line.startswith('--- '):
>                  patchbuf += buf + line
>                  buf = ''
>                  state = 2
> -
>              elif hunk and line.startswith('\ No newline at end of file'):
>                  # If we had a hunk and now we see this, it's part of the patch,
>                  # and we're still expecting another @@ line.
>                  patchbuf += line
> -
>              elif hunk:
>                  state = 1
>                  buf += line
> -
>              else:
>                  state = 0
>                  commentbuf += buf + line
>                  buf = ''
> -
>          elif state == 4 or state == 5:
>              if line.startswith('-'):
>                  lc[0] -= 1
> @@ -159,21 +161,17 @@ def parse_patch(text):
>                  hunk += 1
>              else:
>                  state = 5
> -
>          elif state == 6:
>              if line.startswith(('rename to ', 'rename from ')):
>                  patchbuf += buf + line
>                  buf = ''
> -
>              elif line.startswith('--- '):
>                  patchbuf += buf + line
>                  buf = ''
>                  state = 2
> -
>              else:
>                  buf += line
>                  state = 1
> -
>          else:
>              raise Exception("Unknown state %d! (line '%s')" % (state, line))
>
> @@ -185,19 +183,19 @@ def parse_patch(text):
>      if commentbuf == '':
>          commentbuf = None
>
> -    return (patchbuf, commentbuf)
> +    return patchbuf, commentbuf
>
>
> -def hash_patch(str):
> +def hash_diff(diff):
> +    """Generate a hash from a diff."""
>      # normalise spaces
> -    str = str.replace('\r', '')
> -    str = str.strip() + '\n'
> +    diff = diff.replace('\r', '')
> +    diff = diff.strip() + '\n'
>
>      prefixes = ['-', '+', ' ']
>      hash = hashlib.sha1()
>
> -    for line in str.split('\n'):
> -
> +    for line in diff.split('\n'):
>          if len(line) <= 0:
>              continue
>
> @@ -213,7 +211,6 @@ def hash_patch(str):
>              filename += '/'.join(filename_match.group(2).split('/')[1:])
>
>              line = filename_match.group(1) + ' ' + filename
> -
>          elif hunk_match:
>              # remove line numbers, but leave line counts
>              def fn(x):
> @@ -222,11 +219,9 @@ def hash_patch(str):
>                  return int(x)
>              line_nos = list(map(fn, hunk_match.groups()))
>              line = '@@ -%d +%d @@' % tuple(line_nos)
> -
>          elif line[0] in prefixes:
>              # if we have a +, - or context line, leave as-is
>              pass
> -
>          else:
>              # other lines are ignored
>              continue
> @@ -246,15 +241,15 @@ def extract_tags(content, tags):
>      return counts
>
>
> -def patch_get_filenames(str):
> +def find_filenames(diff):
> +    """Find files changes in a given diff."""
>      # normalise spaces
> -    str = str.replace('\r', '')
> -    str = str.strip() + '\n'
> +    diff = diff.replace('\r', '')
> +    diff = diff.strip() + '\n'
>
>      filenames = {}
>
> -    for line in str.split('\n'):
> -
> +    for line in diff.split('\n'):
>          if len(line) <= 0:
>              continue
>
> @@ -291,21 +286,20 @@ def main(args):
>
>      # decode from (assumed) UTF-8
>      content = sys.stdin.read().decode('utf-8')
> -
> -    (patch, comment) = parse_patch(content)
> +    patch, comment = parse_patch(content)
>
>      if options.print_hash and patch:
> -        print(hash_patch(patch).hexdigest())
> +        print(hash_diff(patch).hexdigest())
>
>      if options.print_patch and patch:
> -        print("Patch: ------\n" + patch)
> +        print('Patch: ------\n' + patch)
>
>      if options.print_comment and comment:
> -        print("Comment: ----\n" + comment)
> +        print('Comment: ----\n' + comment)
>
>      if options.print_filenames:
> -        filenames = patch_get_filenames(content)
> -        print("File names: ----\n" + '\n'.join(filenames))
> +        filenames = find_filenames(content)
> +        print('File names: ----\n' + '\n'.join(filenames))
>
>  if __name__ == '__main__':
>      import sys
>
diff mbox

Patch

diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index 8648d29..56cd126 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -44,7 +44,7 @@  from django.utils.six.moves import map
 from patchwork.models import (Patch, Project, Person, Comment, State,
                               DelegationRule, Submission, CoverLetter,
                               get_default_initial_patch_state)
-from patchwork.parser import parse_patch, patch_get_filenames
+from patchwork.parser import parse_patch, find_filenames
 
 LOGGER = logging.getLogger(__name__)
 
@@ -494,7 +494,7 @@  def parse_mail(mail, list_id=None):
 
         delegate = find_delegate(mail)
         if not delegate and diff:
-            filenames = patch_get_filenames(diff)
+            filenames = find_filenames(diff)
             delegate = auto_delegate(project, filenames)
 
         patch = Patch(
diff --git a/patchwork/models.py b/patchwork/models.py
index 521b20c..ee2ee63 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -35,7 +35,7 @@  from django.utils.functional import cached_property
 from django.utils.six.moves import filter
 
 from patchwork.fields import HashField
-from patchwork.parser import extract_tags, hash_patch
+from patchwork.parser import extract_tags, hash_diff
 
 
 @python_2_unicode_compatible
@@ -364,7 +364,7 @@  class Patch(Submission):
             self.state = get_default_initial_patch_state()
 
         if self.hash is None and self.diff is not None:
-            self.hash = hash_patch(self.diff).hexdigest()
+            self.hash = hash_diff(self.diff).hexdigest()
 
         super(Patch, self).save(**kwargs)
 
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 8bf9b21..f173431 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -33,7 +33,22 @@  _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
 _filename_re = re.compile('^(---|\+\+\+) (\S+)')
 
 
-def parse_patch(text):
+def parse_patch(content):
+    """Split a mail's contents into a diff and comment.
+
+    This is a state machine that takes a patch, generally in UNIX mbox
+    format, and splits it into the component comments and diff.
+
+    Args:
+        patch: The patch to be split
+
+    Returns:
+        A tuple containing the diff and comment. Either one or both of
+        these can be empty.
+
+    Raises:
+        Exception: The state machine transitioned to an invalid state.
+    """
     patchbuf = ''
     commentbuf = ''
     buf = ''
@@ -68,7 +83,7 @@  def parse_patch(text):
     lc = (0, 0)
     hunk = 0
 
-    for line in text.split('\n'):
+    for line in content.split('\n'):
         line += '\n'
 
         if state == 0:
@@ -76,14 +91,11 @@  def parse_patch(text):
                     or line.startswith('Index: '):
                 state = 1
                 buf += line
-
             elif line.startswith('--- '):
                 state = 2
                 buf += line
-
             else:
                 commentbuf += line
-
         elif state == 1:
             buf += line
             if line.startswith('--- '):
@@ -91,25 +103,20 @@  def parse_patch(text):
 
             if line.startswith(('rename from ', 'rename to ')):
                 state = 6
-
         elif state == 2:
             if line.startswith('+++ '):
                 state = 3
                 buf += line
-
             elif hunk:
                 state = 1
                 buf += line
-
             else:
                 state = 0
                 commentbuf += buf + line
                 buf = ''
-
         elif state == 3:
             match = _hunk_re.match(line)
             if match:
-
                 def fn(x):
                     if not x:
                         return 1
@@ -120,26 +127,21 @@  def parse_patch(text):
                 state = 4
                 patchbuf += buf + line
                 buf = ''
-
             elif line.startswith('--- '):
                 patchbuf += buf + line
                 buf = ''
                 state = 2
-
             elif hunk and line.startswith('\ No newline at end of file'):
                 # If we had a hunk and now we see this, it's part of the patch,
                 # and we're still expecting another @@ line.
                 patchbuf += line
-
             elif hunk:
                 state = 1
                 buf += line
-
             else:
                 state = 0
                 commentbuf += buf + line
                 buf = ''
-
         elif state == 4 or state == 5:
             if line.startswith('-'):
                 lc[0] -= 1
@@ -159,21 +161,17 @@  def parse_patch(text):
                 hunk += 1
             else:
                 state = 5
-
         elif state == 6:
             if line.startswith(('rename to ', 'rename from ')):
                 patchbuf += buf + line
                 buf = ''
-
             elif line.startswith('--- '):
                 patchbuf += buf + line
                 buf = ''
                 state = 2
-
             else:
                 buf += line
                 state = 1
-
         else:
             raise Exception("Unknown state %d! (line '%s')" % (state, line))
 
@@ -185,19 +183,19 @@  def parse_patch(text):
     if commentbuf == '':
         commentbuf = None
 
-    return (patchbuf, commentbuf)
+    return patchbuf, commentbuf
 
 
-def hash_patch(str):
+def hash_diff(diff):
+    """Generate a hash from a diff."""
     # normalise spaces
-    str = str.replace('\r', '')
-    str = str.strip() + '\n'
+    diff = diff.replace('\r', '')
+    diff = diff.strip() + '\n'
 
     prefixes = ['-', '+', ' ']
     hash = hashlib.sha1()
 
-    for line in str.split('\n'):
-
+    for line in diff.split('\n'):
         if len(line) <= 0:
             continue
 
@@ -213,7 +211,6 @@  def hash_patch(str):
             filename += '/'.join(filename_match.group(2).split('/')[1:])
 
             line = filename_match.group(1) + ' ' + filename
-
         elif hunk_match:
             # remove line numbers, but leave line counts
             def fn(x):
@@ -222,11 +219,9 @@  def hash_patch(str):
                 return int(x)
             line_nos = list(map(fn, hunk_match.groups()))
             line = '@@ -%d +%d @@' % tuple(line_nos)
-
         elif line[0] in prefixes:
             # if we have a +, - or context line, leave as-is
             pass
-
         else:
             # other lines are ignored
             continue
@@ -246,15 +241,15 @@  def extract_tags(content, tags):
     return counts
 
 
-def patch_get_filenames(str):
+def find_filenames(diff):
+    """Find files changes in a given diff."""
     # normalise spaces
-    str = str.replace('\r', '')
-    str = str.strip() + '\n'
+    diff = diff.replace('\r', '')
+    diff = diff.strip() + '\n'
 
     filenames = {}
 
-    for line in str.split('\n'):
-
+    for line in diff.split('\n'):
         if len(line) <= 0:
             continue
 
@@ -291,21 +286,20 @@  def main(args):
 
     # decode from (assumed) UTF-8
     content = sys.stdin.read().decode('utf-8')
-
-    (patch, comment) = parse_patch(content)
+    patch, comment = parse_patch(content)
 
     if options.print_hash and patch:
-        print(hash_patch(patch).hexdigest())
+        print(hash_diff(patch).hexdigest())
 
     if options.print_patch and patch:
-        print("Patch: ------\n" + patch)
+        print('Patch: ------\n' + patch)
 
     if options.print_comment and comment:
-        print("Comment: ----\n" + comment)
+        print('Comment: ----\n' + comment)
 
     if options.print_filenames:
-        filenames = patch_get_filenames(content)
-        print("File names: ----\n" + '\n'.join(filenames))
+        filenames = find_filenames(content)
+        print('File names: ----\n' + '\n'.join(filenames))
 
 if __name__ == '__main__':
     import sys