diff mbox series

[v2,2/2] parser: Handle IntegrityError for cover letters, comments

Message ID 20180920205836.13163-2-stephen@that.guru
State Accepted
Headers show
Series [v2,1/2] parsearchive: Fix logging | expand

Commit Message

Stephen Finucane Sept. 20, 2018, 8:58 p.m. UTC
This was already done for patches but cover letters and comments were
not handled correctly, resulting in errors while parsing archives. While
we're here, we slightly modify how these exceptions are handle. Rather
than simply ignoring them, as we were doing, we raise a custom
exception. This allows us to specifically identify these types of
exceptions, print a log and still skip them (which we want, as seen in
commit d2eb1f6d2).

While we're here, we change from separate create-save calls to a
combined create-save call for both created CoverLetter and Comment
objects. We were already doing this for patches.

Signed-off-by: Stephen Finucane <stephen@that.guru>
Cc: Daniel Axtens <dja@axtens.net>
---
v2:
- Re-add some logging removed in d2eb1f6d2
- Start logging the actual exception raised for errors
---
 patchwork/management/commands/parsearchive.py | 15 ++++--
 patchwork/management/commands/parsemail.py    |  8 ++-
 patchwork/parser.py                           | 50 ++++++++++++-------
 3 files changed, 48 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
index b4d8bcce..b1a6572e 100644
--- a/patchwork/management/commands/parsearchive.py
+++ b/patchwork/management/commands/parsearchive.py
@@ -12,6 +12,7 @@  from django.core.management.base import BaseCommand
 
 from patchwork import models
 from patchwork.parser import parse_mail
+from patchwork.parser import DuplicateMailError
 
 logger = logging.getLogger(__name__)
 
@@ -34,6 +35,7 @@  class Command(BaseCommand):
             models.CoverLetter: 0,
             models.Comment: 0,
         }
+        duplicates = 0
         dropped = 0
         errors = 0
 
@@ -90,10 +92,13 @@  class Command(BaseCommand):
                     results[type(obj)] += 1
                 else:
                     dropped += 1
-            except ValueError:
-                # TODO(stephenfin): Perhaps we should store the broken patch
-                # somewhere for future reference?
+            except DuplicateMailError as exc:
+                duplicates += 1
+                logger.warning('Duplicate mail for message ID %s', exc.msgid)
+            except (ValueError, Exception) as exc:
                 errors += 1
+                logger.warning('Invalid mail: %s', exc.message,
+                               extra={'mail': mail.as_string()})
 
             if verbosity == 1 and (i % 10) == 0:
                 self.stdout.write('%06d/%06d\r' % (i, count), ending='')
@@ -109,6 +114,7 @@  class Command(BaseCommand):
             '  %(covers)4d cover letters\n'
             '  %(patches)4d patches\n'
             '  %(comments)4d comments\n'
+            '  %(duplicates)4d duplicates\n'
             '  %(dropped)4d dropped\n'
             '  %(errors)4d errors\n'
             'Total: %(new)s new entries' % {
@@ -116,7 +122,8 @@  class Command(BaseCommand):
                 'covers': results[models.CoverLetter],
                 'patches': results[models.Patch],
                 'comments': results[models.Comment],
+                'duplicates': duplicates,
                 'dropped': dropped,
                 'errors': errors,
-                'new': count - dropped - errors,
+                'new': count - duplicates - dropped - errors,
             })
diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py
index a7ec97ff..b0871d21 100644
--- a/patchwork/management/commands/parsemail.py
+++ b/patchwork/management/commands/parsemail.py
@@ -11,6 +11,7 @@  from django.core.management import base
 from django.utils import six
 
 from patchwork.parser import parse_mail
+from patchwork.parser import DuplicateMailError
 
 logger = logging.getLogger(__name__)
 
@@ -65,7 +66,10 @@  class Command(base.BaseCommand):
             result = parse_mail(mail, options['list_id'])
             if result is None:
                 logger.warning('Nothing added to database')
-        except Exception:
-            logger.exception('Error when parsing incoming email',
+        except DuplicateMailError as exc:
+            logger.warning('Duplicate mail for message ID %s', exc.msgid)
+        except (ValueError, Exception) as exc:
+            logger.exception('Error when parsing incoming email: %s',
+                             exc.message,
                              extra={'mail': mail.as_string()})
             sys.exit(1)
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 4ede8255..611db39c 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -41,6 +41,12 @@  SERIES_DELAY_INTERVAL = 10
 logger = logging.getLogger(__name__)
 
 
+class DuplicateMailError(Exception):
+
+    def __init__(self, msgid):
+        self.msgid = msgid
+
+
 def normalise_space(value):
     whitespace_re = re.compile(r'\s+')
     return whitespace_re.sub(' ', value).strip()
@@ -1014,8 +1020,7 @@  def parse_mail(mail, list_id=None):
                 state=find_state(mail))
             logger.debug('Patch saved')
         except IntegrityError:
-            logger.error("Duplicate mail for message ID %s" % msgid)
-            return None
+            raise DuplicateMailError(msgid=msgid)
 
         # if we don't have a series marker, we will never have an existing
         # series to match against.
@@ -1121,15 +1126,18 @@  def parse_mail(mail, list_id=None):
                     logger.error("Multiple SeriesReferences for %s"
                                  " in project %s!" % (msgid, project.name))
 
-            cover_letter = CoverLetter(
-                msgid=msgid,
-                project=project,
-                name=name[:255],
-                date=date,
-                headers=headers,
-                submitter=author,
-                content=message)
-            cover_letter.save()
+            try:
+                cover_letter = CoverLetter.objects.create(
+                    msgid=msgid,
+                    project=project,
+                    name=name[:255],
+                    date=date,
+                    headers=headers,
+                    submitter=author,
+                    content=message)
+            except IntegrityError:
+                raise DuplicateMailError(msgid=msgid)
+
             logger.debug('Cover letter saved')
 
             series.add_cover_letter(cover_letter)
@@ -1145,14 +1153,18 @@  def parse_mail(mail, list_id=None):
 
     author = get_or_create_author(mail)
 
-    comment = Comment(
-        submission=submission,
-        msgid=msgid,
-        date=date,
-        headers=headers,
-        submitter=author,
-        content=message)
-    comment.save()
+
+    try:
+        comment = Comment.objects.create(
+            submission=submission,
+            msgid=msgid,
+            date=date,
+            headers=headers,
+            submitter=author,
+            content=message)
+    except IntegrityError:
+        raise DuplicateMailError(msgid=msgid)
+
     logger.debug('Comment saved')
 
     return comment