diff mbox series

parsemail: Clarify exit codes

Message ID 20180430154459.27697-1-dja@axtens.net
State Accepted
Headers show
Series parsemail: Clarify exit codes | expand

Commit Message

Daniel Axtens April 30, 2018, 3:44 p.m. UTC
jk reports that the patchwork error codes are really unhelpful for
correct integration with an MDA. In particular they make sorting out
failures into a separate queue very difficult. Make this better and
clearer: only return 1 on a genuinely unexpected case that requires
adminstrator intervention.

Update the comment for parse_mail regarding return values and exceptions
to line up with how the function actually works and how we use the
results.

Update the tests. None of the existing tests should exit 1; they're
all 'expected' failures: unknown project etc. Also we removed the
exit(0) from the success path, so stop expecting that exception to
be raised.

Add a test for duplicates. That should also succeed without raising
an exception: dups are part of life.

Update parsearchive to deal with the fact that we can no longer
differentiate duplicates.

Reported-by: Jeremy Kerr <jk@ozlabs.org>
Fixes: #171
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
 patchwork/management/commands/parsearchive.py |  8 +----
 patchwork/management/commands/parsemail.py    | 16 ++++++---
 patchwork/parser.py                           | 42 ++++++++++++++--------
 patchwork/tests/test_management.py            | 50 +++++++++++++++------------
 4 files changed, 68 insertions(+), 48 deletions(-)

Comments

Stephen Finucane May 3, 2018, 2:07 p.m. UTC | #1
On Tue, 2018-05-01 at 01:44 +1000, Daniel Axtens wrote:
> jk reports that the patchwork error codes are really unhelpful for
> correct integration with an MDA. In particular they make sorting out
> failures into a separate queue very difficult. Make this better and
> clearer: only return 1 on a genuinely unexpected case that requires
> adminstrator intervention.
> 
> Update the comment for parse_mail regarding return values and exceptions
> to line up with how the function actually works and how we use the
> results.
> 
> Update the tests. None of the existing tests should exit 1; they're
> all 'expected' failures: unknown project etc. Also we removed the
> exit(0) from the success path, so stop expecting that exception to
> be raised.
> 
> Add a test for duplicates. That should also succeed without raising
> an exception: dups are part of life.
> 
> Update parsearchive to deal with the fact that we can no longer
> differentiate duplicates.
> 
> Reported-by: Jeremy Kerr <jk@ozlabs.org>
> Fixes: #171
> Signed-off-by: Daniel Axtens <dja@axtens.net>

This sounds fine to me and all the changes make sense.

Reviewed-by: Stephen Finucane <stephen@that.guru>

...and applied.
diff mbox series

Patch

diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
index 16ca80f61965..5468d35ee330 100644
--- a/patchwork/management/commands/parsearchive.py
+++ b/patchwork/management/commands/parsearchive.py
@@ -22,7 +22,6 @@  import mailbox
 import os
 import sys
 
-import django
 from django.core.management.base import BaseCommand
 
 from patchwork import models
@@ -49,7 +48,6 @@  class Command(BaseCommand):
             models.CoverLetter: 0,
             models.Comment: 0,
         }
-        duplicates = 0
         dropped = 0
         errors = 0
 
@@ -92,8 +90,6 @@  class Command(BaseCommand):
                     results[type(obj)] += 1
                 else:
                     dropped += 1
-            except django.db.utils.IntegrityError:
-                duplicates += 1
             except ValueError:
                 # TODO(stephenfin): Perhaps we should store the broken patch
                 # somewhere for future reference?
@@ -108,7 +104,6 @@  class Command(BaseCommand):
             '  %(covers)4d cover letters\n'
             '  %(patches)4d patches\n'
             '  %(comments)4d comments\n'
-            '  %(duplicates)4d duplicates\n'
             '  %(dropped)4d dropped\n'
             '  %(errors)4d errors\n'
             'Total: %(new)s new entries' % {
@@ -116,9 +111,8 @@  class Command(BaseCommand):
                 'covers': results[models.CoverLetter],
                 'patches': results[models.Patch],
                 'comments': results[models.Comment],
-                'duplicates': duplicates,
                 'dropped': dropped,
                 'errors': errors,
-                'new': count - duplicates - dropped - errors,
+                'new': count - dropped - errors,
             })
         mbox.close()
diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py
index 6d9825f737f2..f62fb4f51aa1 100644
--- a/patchwork/management/commands/parsemail.py
+++ b/patchwork/management/commands/parsemail.py
@@ -66,12 +66,20 @@  class Command(base.BaseCommand):
             logger.warning("Broken email ignored")
             return
 
+        # it's important to get exit codes correct here. The key is to allow
+        # proper separation of real errors vs expected 'failures'.
+        #
+        # patch/comment parsed:        0
+        # no parseable content found:  0
+        # duplicate messages:          0
+        # db integrity/other db error: 1
+        # broken email (ValueError):   1 (this could be noisy, if it's an issue
+        #                                 we could use a different return code)
         try:
             result = parse_mail(mail, options['list_id'])
-            if result:
-                sys.exit(0)
-            logger.warning('Failed to parse mail')
-            sys.exit(1)
+            if result is None:
+                logger.warning('Nothing added to database')
         except Exception:
             logger.exception('Error when parsing incoming email',
                              extra={'mail': mail.as_string()})
+            sys.exit(1)
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 805037c72d73..8f9af81163f9 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -29,6 +29,7 @@  import logging
 import re
 
 from django.contrib.auth.models import User
+from django.db.utils import IntegrityError
 from django.utils import six
 
 from patchwork.models import Comment
@@ -939,7 +940,14 @@  def parse_mail(mail, list_id=None):
         list_id (str): Mailing list ID
 
     Returns:
-        None
+        patch/cover letter/comment
+        Or None if nothing is found in the mail
+                   or X-P-H: ignore
+                   or project not found
+
+    Raises:
+        ValueError if there is an error in parsing or a duplicate mail
+        Other truly unexpected issues may bubble up from the DB.
     """
     # some basic sanity checks
     if 'From' not in mail:
@@ -1001,20 +1009,24 @@  def parse_mail(mail, list_id=None):
             filenames = find_filenames(diff)
             delegate = find_delegate_by_filename(project, filenames)
 
-        patch = Patch.objects.create(
-            msgid=msgid,
-            project=project,
-            patch_project=project,
-            name=name[:255],
-            date=date,
-            headers=headers,
-            submitter=author,
-            content=message,
-            diff=diff,
-            pull_url=pull_url,
-            delegate=delegate,
-            state=find_state(mail))
-        logger.debug('Patch saved')
+        try:
+            patch = Patch.objects.create(
+                msgid=msgid,
+                project=project,
+                patch_project=project,
+                name=name[:255],
+                date=date,
+                headers=headers,
+                submitter=author,
+                content=message,
+                diff=diff,
+                pull_url=pull_url,
+                delegate=delegate,
+                state=find_state(mail))
+            logger.debug('Patch saved')
+        except IntegrityError:
+            logger.error("Duplicate mail for message ID %s" % msgid)
+            return None
 
         # if we don't have a series marker, we will never have an existing
         # series to match against.
diff --git a/patchwork/tests/test_management.py b/patchwork/tests/test_management.py
index f548fce3b8e5..66f88dfa2698 100644
--- a/patchwork/tests/test_management.py
+++ b/patchwork/tests/test_management.py
@@ -40,30 +40,27 @@  class ParsemailTest(TestCase):
 
     def test_missing_project_path(self):
         path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
-        with self.assertRaises(SystemExit) as exc:
-            call_command('parsemail', infile=path)
+        call_command('parsemail', infile=path)
 
-        self.assertEqual(exc.exception.code, 1)
+        count = models.Patch.objects.all().count()
+        self.assertEqual(count, 0)
 
     def test_missing_project_stdin(self):
         path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
         sys.stdin.close()
         sys.stdin = open(path)
-        with self.assertRaises(SystemExit) as exc:
-            call_command('parsemail', infile=None)
+        call_command('parsemail', infile=None)
 
         sys.stdin.close()
-        self.assertEqual(exc.exception.code, 1)
+        count = models.Patch.objects.all().count()
+        self.assertEqual(count, 0)
 
     def test_valid_path(self):
         project = utils.create_project()
         utils.create_state()
 
         path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
-        with self.assertRaises(SystemExit) as exc:
-            call_command('parsemail', infile=path, list_id=project.listid)
-
-        self.assertEqual(exc.exception.code, 0)
+        call_command('parsemail', infile=path, list_id=project.listid)
 
         count = models.Patch.objects.filter(project=project.id).count()
         self.assertEqual(count, 1)
@@ -75,12 +72,9 @@  class ParsemailTest(TestCase):
         path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
         sys.stdin.close()
         sys.stdin = open(path)
-        with self.assertRaises(SystemExit) as exc:
-            call_command('parsemail', infile=None,
-                         list_id=project.listid)
+        call_command('parsemail', infile=None, list_id=project.listid)
 
         sys.stdin.close()
-        self.assertEqual(exc.exception.code, 0)
 
         count = models.Patch.objects.filter(project=project.id).count()
         self.assertEqual(count, 1)
@@ -90,10 +84,7 @@  class ParsemailTest(TestCase):
         utils.create_state()
 
         path = os.path.join(TEST_MAIL_DIR, '0013-with-utf8-body.mbox')
-        with self.assertRaises(SystemExit) as exc:
-            call_command('parsemail', infile=path, list_id=project.listid)
-
-        self.assertEqual(exc.exception.code, 0)
+        call_command('parsemail', infile=path, list_id=project.listid)
 
         count = models.Patch.objects.filter(project=project.id).count()
         self.assertEqual(count, 1)
@@ -105,15 +96,30 @@  class ParsemailTest(TestCase):
         path = os.path.join(TEST_MAIL_DIR, '0013-with-utf8-body.mbox')
         sys.stdin.close()
         sys.stdin = open(path)
-        with self.assertRaises(SystemExit) as exc:
-            call_command('parsemail', infile=None,
-                         list_id=project.listid)
+        call_command('parsemail', infile=None, list_id=project.listid)
 
-        self.assertEqual(exc.exception.code, 0)
+        count = models.Patch.objects.filter(project=project.id).count()
+        self.assertEqual(count, 1)
+
+    def test_dup_mail(self):
+        project = utils.create_project()
+        utils.create_state()
+
+        path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
+        call_command('parsemail', infile=path, list_id=project.listid)
 
         count = models.Patch.objects.filter(project=project.id).count()
         self.assertEqual(count, 1)
 
+        # the parser should return None, not throwing an exception
+        # as this is a pretty normal part of life on a busy site
+        call_command('parsemail', infile=path, list_id=project.listid)
+
+        # this would be lovely but doesn't work because we caused an error in
+        # the transaction and we have no way to reset it
+        # count = models.Patch.objects.filter(project=project.id).count()
+        # self.assertEqual(count, 1)
+
 
 class ParsearchiveTest(TestCase):
     def test_invalid_path(self):