From patchwork Thu Sep 20 20:58:35 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stephen Finucane X-Patchwork-Id: 972737 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.ozlabs.org (lists.ozlabs.org [203.11.71.2]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42GTcS5RSjz9s55 for ; Fri, 21 Sep 2018 06:59:20 +1000 (AEST) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=that.guru Authentication-Results: ozlabs.org; dkim=fail reason="key not found in DNS" (0-bit key; unprotected) header.d=that.guru header.i=@that.guru header.b="VRzb1Udb"; dkim-atps=neutral Received: from bilbo.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) by lists.ozlabs.org (Postfix) with ESMTP id 42GTcS3rMnzF3P8 for ; Fri, 21 Sep 2018 06:59:20 +1000 (AEST) Authentication-Results: lists.ozlabs.org; dmarc=none (p=none dis=none) header.from=that.guru Authentication-Results: lists.ozlabs.org; dkim=fail reason="key not found in DNS" (0-bit key; unprotected) header.d=that.guru header.i=@that.guru header.b="VRzb1Udb"; dkim-atps=neutral X-Original-To: patchwork@lists.ozlabs.org Delivered-To: patchwork@lists.ozlabs.org Authentication-Results: lists.ozlabs.org; spf=none (mailfrom) smtp.mailfrom=that.guru (client-ip=185.234.75.8; helo=relay001.mxroute.com; envelope-from=stephen@that.guru; receiver=) Authentication-Results: lists.ozlabs.org; dmarc=none (p=none dis=none) header.from=that.guru Authentication-Results: lists.ozlabs.org; dkim=fail reason="key not found in DNS" (0-bit key; unprotected) header.d=that.guru header.i=@that.guru header.b="VRzb1Udb"; dkim-atps=neutral Received: from relay001.mxroute.com (unknown [185.234.75.8]) (using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 42GTcM55LkzF3Nx for ; Fri, 21 Sep 2018 06:59:15 +1000 (AEST) Received: from filter002.mxroute.com (unknown [185.133.192.179]) by relay001.mxroute.com (Postfix) with ESMTP id 751F03F0F3 for ; Thu, 20 Sep 2018 20:58:42 +0000 (UTC) Received: from one.mxroute.com (one.mxroute.com [195.201.59.211]) by filter002.mxroute.com (Postfix) with ESMTPS id 2BA373F04D for ; Thu, 20 Sep 2018 20:58:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=that.guru; s=default; h=Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To:MIME-Version :Content-Type:Content-Transfer-Encoding:Content-ID:Content-Description: Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID: In-Reply-To:References:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=+fU3xpigU+lTT0bOmnbr1hJhD/RD6ptq61av3EapMl8=; b=VRzb1UdbALtObJw2S1rKDoEFkk Qjo5y5xy87VXfZdm7yUAEr/RogGgp4h/iVk5fC9o3oj1HFeSFPgrugFakmXCGnVlajoVNIXn6/w5Q Oyh/QbVGlebBBBr6Ln57eclBqTG+hR9YuFafnivh8GlsxbWTnjKTZ+zdwm5qVyY83nZ/lbPehA4vC 6w02eNnTR6Zmuv8/r9mbrQrIId6+lVmSzBnlUQnjDcRCWZPTQtko/uXIZqvlim/f4W660x0Ekk3lv L89ZY2+vJGh/zNi/mAkRG0uKEKDwIPgXPKzqWEjrwiaqyC3DFyC0bBP4RaDKa8s1OyluZr3TMqW7J tX1bOTiw==; From: Stephen Finucane To: patchwork@lists.ozlabs.org Subject: [PATCH v2 1/2] parsearchive: Fix logging Date: Thu, 20 Sep 2018 21:58:35 +0100 Message-Id: <20180920205836.13163-1-stephen@that.guru> X-Mailer: git-send-email 2.17.1 X-AuthUser: stephen@that.guru X-BeenThere: patchwork@lists.ozlabs.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Patchwork development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Patchwork" We should use a counter normally, avoid using the counter and emit logs when more detailed output is requested, and emit nothing when no output is requested. In addition, the default logging level for the parser module is set to 'WARNING' to make it less chatty. Signed-off-by: Stephen Finucane --- v2: - Set 'WARNING' level logging by default for all parsing activities - Make more use of loggers rather than stdout --- patchwork/management/commands/parsearchive.py | 26 ++++++++++++++++--- patchwork/settings/base.py | 6 ++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index 96f64fa7..b4d8bcce 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -37,10 +37,24 @@ class Command(BaseCommand): dropped = 0 errors = 0 + verbosity = int(options['verbosity']) + if not verbosity: + level = logging.CRITICAL + elif verbosity == 1: + level = logging.ERROR + elif verbosity == 2: + level = logging.INFO + else: # verbosity == 3 + level = logging.DEBUG + + if level: + logger.setLevel(level) + logging.getLogger('patchwork.parser').setLevel(level) + # TODO(stephenfin): Support passing via stdin path = args and args[0] or options['infile'] if not os.path.exists(path): - self.stdout.write('Invalid path: %s' % path) + logger.error('Invalid path: %s', path) sys.exit(1) # assume if is a directory, then we're passing a maildir @@ -65,7 +79,7 @@ class Command(BaseCommand): for m in mbox: pass except AttributeError: - logger.warning('Broken mbox/Maildir, aborting') + logger.error('Broken mbox/Maildir, aborting') return logger.info('Parsing %d mails', count) @@ -81,10 +95,15 @@ class Command(BaseCommand): # somewhere for future reference? errors += 1 - if (i % 10) == 0: + if verbosity == 1 and (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') self.stdout.flush() + mbox.close() + + if not verbosity: + return + self.stdout.write( 'Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' @@ -101,4 +120,3 @@ class Command(BaseCommand): 'errors': errors, 'new': count - dropped - errors, }) - mbox.close() diff --git a/patchwork/settings/base.py b/patchwork/settings/base.py index 16ca712a..3eb1f0e2 100644 --- a/patchwork/settings/base.py +++ b/patchwork/settings/base.py @@ -177,17 +177,17 @@ LOGGING = { 'loggers': { 'django': { 'handlers': ['console'], - 'level': 'INFO', + 'level': 'WARNING', 'propagate': True, }, 'patchwork.parser': { 'handlers': ['console'], - 'level': 'DEBUG', + 'level': 'WARNING', 'propagate': False, }, 'patchwork.management.commands': { 'handlers': ['console', 'mail_admins'], - 'level': 'INFO', + 'level': 'WARNING', 'propagate': True, }, }, From patchwork Thu Sep 20 20:58:36 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stephen Finucane X-Patchwork-Id: 972738 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42GTct0KR4z9s55 for ; Fri, 21 Sep 2018 06:59:42 +1000 (AEST) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=that.guru Authentication-Results: ozlabs.org; dkim=fail reason="key not found in DNS" (0-bit key; unprotected) header.d=that.guru header.i=@that.guru header.b="BSJIJZ5S"; dkim-atps=neutral Received: from bilbo.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) by lists.ozlabs.org (Postfix) with ESMTP id 42GTcs5whHzF3PH for ; Fri, 21 Sep 2018 06:59:41 +1000 (AEST) Authentication-Results: lists.ozlabs.org; dmarc=none (p=none dis=none) header.from=that.guru Authentication-Results: lists.ozlabs.org; dkim=fail reason="key not found in DNS" (0-bit key; unprotected) header.d=that.guru header.i=@that.guru header.b="BSJIJZ5S"; dkim-atps=neutral X-Original-To: patchwork@lists.ozlabs.org Delivered-To: patchwork@lists.ozlabs.org Authentication-Results: lists.ozlabs.org; spf=none (mailfrom) smtp.mailfrom=that.guru (client-ip=185.234.75.6; helo=relay-direct6.mxroute.com; envelope-from=stephen@that.guru; receiver=) Authentication-Results: lists.ozlabs.org; dmarc=none (p=none dis=none) header.from=that.guru Authentication-Results: lists.ozlabs.org; dkim=fail reason="key not found in DNS" (0-bit key; unprotected) header.d=that.guru header.i=@that.guru header.b="BSJIJZ5S"; dkim-atps=neutral Received: from relay-direct6.mxroute.com (relay-direct6.mxroute.com [185.234.75.6]) (using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 42GTcN1LT9zF3P0 for ; Fri, 21 Sep 2018 06:59:16 +1000 (AEST) Received: from filter002.mxroute.com (unknown [185.133.192.179]) by relay-direct6.mxroute.com (Postfix) with ESMTP id ECF533F0FC; Thu, 20 Sep 2018 20:58:42 +0000 (UTC) Received: from one.mxroute.com (one.mxroute.com [195.201.59.211]) by filter002.mxroute.com (Postfix) with ESMTPS id D5FA93F04D; Thu, 20 Sep 2018 20:58:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=that.guru; s=default; h=References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=yYDyNFriRrwMmez9jknLXD/upkJJGLgkR+POyDf+yY0=; b=BSJIJZ5S2ivU3AFpPkMIJe0TRS iglH1RMDtgL4NWm7nUmPy8UuALPUN03mAHKRX4q62QxbJAA38JjVIDNqIFaZvr26WfgyGOt6KLJ43 xkZ21Wdq+teL6peRvapHgFurOzn9v2e1B6ykDdYGdWrwRbavr54C0A03QmxgwEP53SvNnybb2XedJ +dnnWmLzfZHnCg6s+XJ8oZ6ESVLonMjnKS0VkNE2fbN4V6vyOW2Lt3xfkgQzKhYoklZBDiB7fOJtL l4wkVGqi+2MNUAJ65/uYLWwC/PQVfLMQY0BzuNc2FVitbrP5qJsCD02guC2PzjVsJoDeiaMkEXXrj RuftoJAQ==; From: Stephen Finucane To: patchwork@lists.ozlabs.org Subject: [PATCH v2 2/2] parser: Handle IntegrityError for cover letters, comments Date: Thu, 20 Sep 2018 21:58:36 +0100 Message-Id: <20180920205836.13163-2-stephen@that.guru> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20180920205836.13163-1-stephen@that.guru> References: <20180920205836.13163-1-stephen@that.guru> X-AuthUser: stephen@that.guru X-BeenThere: patchwork@lists.ozlabs.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Patchwork development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Patchwork" This was already done for patches but cover letters and comments were not handled correctly, resulting in errors while parsing archives. While we're here, we slightly modify how these exceptions are handle. Rather than simply ignoring them, as we were doing, we raise a custom exception. This allows us to specifically identify these types of exceptions, print a log and still skip them (which we want, as seen in commit d2eb1f6d2). While we're here, we change from separate create-save calls to a combined create-save call for both created CoverLetter and Comment objects. We were already doing this for patches. Signed-off-by: Stephen Finucane Cc: Daniel Axtens --- v2: - Re-add some logging removed in d2eb1f6d2 - Start logging the actual exception raised for errors --- patchwork/management/commands/parsearchive.py | 15 ++++-- patchwork/management/commands/parsemail.py | 8 ++- patchwork/parser.py | 50 ++++++++++++------- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index b4d8bcce..b1a6572e 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -12,6 +12,7 @@ from django.core.management.base import BaseCommand from patchwork import models from patchwork.parser import parse_mail +from patchwork.parser import DuplicateMailError logger = logging.getLogger(__name__) @@ -34,6 +35,7 @@ class Command(BaseCommand): models.CoverLetter: 0, models.Comment: 0, } + duplicates = 0 dropped = 0 errors = 0 @@ -90,10 +92,13 @@ class Command(BaseCommand): results[type(obj)] += 1 else: dropped += 1 - except ValueError: - # TODO(stephenfin): Perhaps we should store the broken patch - # somewhere for future reference? + except DuplicateMailError as exc: + duplicates += 1 + logger.warning('Duplicate mail for message ID %s', exc.msgid) + except (ValueError, Exception) as exc: errors += 1 + logger.warning('Invalid mail: %s', exc.message, + extra={'mail': mail.as_string()}) if verbosity == 1 and (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') @@ -109,6 +114,7 @@ class Command(BaseCommand): ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' + ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { @@ -116,7 +122,8 @@ class Command(BaseCommand): 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], + 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, - 'new': count - dropped - errors, + 'new': count - duplicates - dropped - errors, }) diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py index a7ec97ff..b0871d21 100644 --- a/patchwork/management/commands/parsemail.py +++ b/patchwork/management/commands/parsemail.py @@ -11,6 +11,7 @@ from django.core.management import base from django.utils import six from patchwork.parser import parse_mail +from patchwork.parser import DuplicateMailError logger = logging.getLogger(__name__) @@ -65,7 +66,10 @@ class Command(base.BaseCommand): result = parse_mail(mail, options['list_id']) if result is None: logger.warning('Nothing added to database') - except Exception: - logger.exception('Error when parsing incoming email', + except DuplicateMailError as exc: + logger.warning('Duplicate mail for message ID %s', exc.msgid) + except (ValueError, Exception) as exc: + logger.exception('Error when parsing incoming email: %s', + exc.message, extra={'mail': mail.as_string()}) sys.exit(1) diff --git a/patchwork/parser.py b/patchwork/parser.py index 4ede8255..611db39c 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -41,6 +41,12 @@ SERIES_DELAY_INTERVAL = 10 logger = logging.getLogger(__name__) +class DuplicateMailError(Exception): + + def __init__(self, msgid): + self.msgid = msgid + + def normalise_space(value): whitespace_re = re.compile(r'\s+') return whitespace_re.sub(' ', value).strip() @@ -1014,8 +1020,7 @@ def parse_mail(mail, list_id=None): state=find_state(mail)) logger.debug('Patch saved') except IntegrityError: - logger.error("Duplicate mail for message ID %s" % msgid) - return None + raise DuplicateMailError(msgid=msgid) # if we don't have a series marker, we will never have an existing # series to match against. @@ -1121,15 +1126,18 @@ def parse_mail(mail, list_id=None): logger.error("Multiple SeriesReferences for %s" " in project %s!" % (msgid, project.name)) - cover_letter = CoverLetter( - msgid=msgid, - project=project, - name=name[:255], - date=date, - headers=headers, - submitter=author, - content=message) - cover_letter.save() + try: + cover_letter = CoverLetter.objects.create( + msgid=msgid, + project=project, + name=name[:255], + date=date, + headers=headers, + submitter=author, + content=message) + except IntegrityError: + raise DuplicateMailError(msgid=msgid) + logger.debug('Cover letter saved') series.add_cover_letter(cover_letter) @@ -1145,14 +1153,18 @@ def parse_mail(mail, list_id=None): author = get_or_create_author(mail) - comment = Comment( - submission=submission, - msgid=msgid, - date=date, - headers=headers, - submitter=author, - content=message) - comment.save() + + try: + comment = Comment.objects.create( + submission=submission, + msgid=msgid, + date=date, + headers=headers, + submitter=author, + content=message) + except IntegrityError: + raise DuplicateMailError(msgid=msgid) + logger.debug('Comment saved') return comment