Message ID | BLU436-SMTP770163118D1C389E271418A3E90@phx.gbl |
---|---|
State | Superseded |
Headers | show |
On 08/21/2016 01:09 PM, Stephen Finucane wrote: > From: Stephen Finucane <stephen.finucane@intel.com> > > Management comands allow applications to register their own actions with > 'manage.py'. This provides some advantages, like automatically > configuring Django (removing the need for 'django.setup' calls) and > removing the need to set the PYTHON_PATH. The 'parsemail' script is a > natural fit for this type of application. Migrate 'parsemail' to a > management command. > > This includes some extensive work on logging configuration, as logging > is moved from code into settings. In addition, it removes a lot of the > customizable logging previously introduced in the parsemail command, in > favour of modifications to the settings files. > > Signed-off-by: Stephen Finucane <stephen.finucane@intel.com> > Partial-bug: #17 > Closes-bug: #15 One question, but I'm basically +1: > diff --git a/patchwork/bin/parsemail.sh b/patchwork/bin/parsemail.sh > index 9973392..85a21a1 100755 > --- a/patchwork/bin/parsemail.sh > +++ b/patchwork/bin/parsemail.sh > @@ -24,6 +24,8 @@ PATCHWORK_BASE=`readlink -e $BIN_DIR/../..` > > PYTHONPATH="$PATCHWORK_BASE":"$PATCHWORK_BASE/lib/python:$PYTHONPATH" \ > DJANGO_SETTINGS_MODULE=patchwork.settings.production \ > - "$PATCHWORK_BASE/patchwork/bin/parsemail.py" > + "$PATCHWORK_BASE/patchwork/manage.py parsemail" > > +# NOTE(stephenfin): We must return 0 here. For more information, refer > +# to https://patchwork.ozlabs.org/patch/602248/ > exit 0 This makes sense, but I'm still confused why you don't pass $* to the command since it can take "--list-id" as an option?
>> PYTHONPATH="$PATCHWORK_BASE":"$PATCHWORK_BASE/lib/python:$PYTHONPATH" \ >> DJANGO_SETTINGS_MODULE=patchwork.settings.production \ >> - "$PATCHWORK_BASE/patchwork/bin/parsemail.py" >> + "$PATCHWORK_BASE/patchwork/manage.py parsemail" >> >> +# NOTE(stephenfin): We must return 0 here. For more information, refer >> +# to https://patchwork.ozlabs.org/patch/602248/ >> exit 0 > > This makes sense, but I'm still confused why you don't pass $* to the > command since it can take "--list-id" as an option? Yep - I had added $@ to my invocation, which I think does something similar. Regards, Daniel
> +from email import message_from_file > +import logging > +from optparse import make_option This is throwing a warning: RemovedInDjango110Warning: OptionParser usage for Django management commands is deprecated, use ArgumentParser instead Likewise with the other management command. Could you flick over to argparse please? Regards, Daniel > +import sys > + > +from django.core.management import base > + > +from patchwork.parser import parse_mail > + > +logger = logging.getLogger(__name__) > + > + > +class Command(base.BaseCommand): > + help = 'Parse an mbox file and store any patch/comment found' > + args = '<infile>' # Django < 1.8 compatibility > + option_list = base.BaseCommand.option_list + ( > + make_option( > + '--list-id', > + help='mailing list ID. If not supplied, this will be extracted ' > + 'from the mail headers.' > + ), > + ) > + > + def handle(self, *args, **options): > + # Attempt to parse the path if provided, and fallback to stdin if not > + if args: > + logger.info('Parsing mail loaded by filename') > + with open(args[0]) as file_: > + mail = message_from_file(file_) > + else: > + logger.info('Parsing mail loaded from stdin') > + mail = message_from_file(sys.stdin) > + > + try: > + result = parse_mail(mail, options['list_id']) > + if result: > + sys.exit(0) > + logger.warning('Failed to parse mail.') > + sys.exit(1) > + except Exception as exc: > + logger.exception('Error when parsing incoming email', > + extra={'mail': mail.as_string()}) > diff --git a/patchwork/parser.py b/patchwork/parser.py > index 1805df8..51de997 100644 > --- a/patchwork/parser.py > +++ b/patchwork/parser.py > @@ -42,7 +42,7 @@ _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') > _filename_re = re.compile(r'^(---|\+\+\+) (\S+)') > list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list'] > > -LOGGER = logging.getLogger(__name__) > +logger = logging.getLogger(__name__) > > > def normalise_space(str): > @@ -599,7 +599,7 @@ def parse_mail(mail, list_id=None): > > hint = mail.get('X-Patchwork-Hint', '').lower() > if hint == 'ignore': > - LOGGER.debug("Ignoring email due to 'ignore' hint") > + logger.debug("Ignoring email due to 'ignore' hint") > return > > if list_id: > @@ -608,7 +608,7 @@ def parse_mail(mail, list_id=None): > project = find_project_by_header(mail) > > if project is None: > - LOGGER.error('Failed to find a project for email') > + logger.error('Failed to find a project for email') > return > > # parse content > @@ -651,7 +651,7 @@ def parse_mail(mail, list_id=None): > delegate=delegate, > state=find_state(mail)) > patch.save() > - LOGGER.debug('Patch saved') > + logger.debug('Patch saved') > > return patch > elif x == 0: # (potential) cover letters > @@ -683,7 +683,7 @@ def parse_mail(mail, list_id=None): > submitter=author, > content=message) > cover_letter.save() > - LOGGER.debug('Cover letter saved') > + logger.debug('Cover letter saved') > > return cover_letter > > @@ -704,7 +704,7 @@ def parse_mail(mail, list_id=None): > submitter=author, > content=message) > comment.save() > - LOGGER.debug('Comment saved') > + logger.debug('Comment saved') > > return comment > > diff --git a/patchwork/settings/base.py b/patchwork/settings/base.py > index b78ed4b..a32710f 100644 > --- a/patchwork/settings/base.py > +++ b/patchwork/settings/base.py > @@ -125,6 +125,8 @@ STATICFILES_DIRS = [ > # Third-party application settings > # > > +# rest_framework > + > try: > # django rest framework isn't a standard package in most distros, so > # don't make it compulsory > @@ -136,17 +138,65 @@ try: > except ImportError: > pass > > -# > -# Third-party application settings > -# > - > -# rest_framework > > REST_FRAMEWORK = { > 'DEFAULT_VERSIONING_CLASS': 'rest_framework.versioning.NamespaceVersioning' > } > > # > +# Logging settings > +# > + > +LOGGING = { > + 'version': 1, > + 'disable_existing_loggers': False, > + 'formatters': { > + 'email': { > + 'format': '== Mail\n\n%(mail)s\n\n== Traceback\n', > + }, > + }, > + 'filters': { > + 'require_debug_false': { > + '()': 'django.utils.log.RequireDebugFalse', > + }, > + 'require_debug_true': { > + '()': 'django.utils.log.RequireDebugTrue', > + }, > + }, > + 'handlers': { > + 'console': { > + 'level': 'DEBUG', > + 'filters': ['require_debug_true'], > + 'class': 'logging.StreamHandler', > + }, > + 'mail_admins': { > + 'level': 'ERROR', > + 'filters': ['require_debug_false'], > + 'class': 'django.utils.log.AdminEmailHandler', > + 'formatter': 'email', > + 'include_html': True, > + }, > + }, > + 'loggers': { > + 'django': { > + 'handlers': ['console'], > + 'level': 'INFO', > + 'propagate': True, > + }, > + 'patchwork.parser': { > + 'handlers': ['console'], > + 'level': 'DEBUG', > + 'propagate': False, > + }, > + 'patchwork.management.commands': { > + 'handlers': ['console', 'mail_admins'], > + 'level': 'INFO', > + 'propagate': True, > + }, > + }, > +} > + > +# > # Patchwork settings > # > > -- > 2.7.4 > > _______________________________________________ > Patchwork mailing list > Patchwork@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/patchwork
> + def handle(self, *args, **options): > + # Attempt to parse the path if provided, and fallback to stdin if not > + if args: > + logger.info('Parsing mail loaded by filename') > + with open(args[0]) as file_: > + mail = message_from_file(file_) > + else: > + logger.info('Parsing mail loaded from stdin') > + mail = message_from_file(sys.stdin) > + So, I have found an interesting case here, not strictly related to this patch but related to parsing messages from files. I have been testing with some messages from this list from earlier this month. One [0] includes the following sequence: 000018f0 69 65 73 20 76 69 65 77 29 20 3f c2 a0 20 48 6f |ies view) ?.. Ho| Note the sequence "c2 a0". Both these are > 128 and therefore not part of 7-bit ASCII. Apparently this is a UTF-8 for a non-breaking space: http://stackoverflow.com/a/2774507/463510 email.message_from_file does not handle this well: it boils down to UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 6395: ordinal not in range(128) I imagine this hasn't hit us in production because most (all?) production users use Python2, which doesn't have the bytes/string distinction that Python3 has. Anyway, the only way I've found to work around this is to do something like this: with open(args[0], 'rb') as file_: decoded_mail = file_.read().decode('utf-8') mail = email.message_from_string(decoded_mail) This is super ugly, but works in Py3. Ironically it doesn't work in Py2, but it's a start. Could you include something like this in this patch set? I think the parsearchive will require something similar too. I'm going to start collecting these "interesting" emails to make a test suite. Regards, Daniel [0] https://lists.ozlabs.org/pipermail/patchwork/2016-August/003158.html
Daniel Axtens <dja@axtens.net> writes: > with open(args[0], 'rb') as file_: > decoded_mail = file_.read().decode('utf-8') > mail = email.message_from_string(decoded_mail) > > This is super ugly, but works in Py3. Ironically it doesn't work in Py2, > but it's a start. Could you include something like this in this patch > set? I think the parsearchive will require something similar too. Looks like there's message_from_bytes and message_from_binary_file in recent versions of 'email' - I haven't tried but I assume that would be better. Anyway, I'll leave it to your judgement. Regards, Daniel > > I'm going to start collecting these "interesting" emails to make a test suite. > > Regards, > Daniel > > [0] https://lists.ozlabs.org/pipermail/patchwork/2016-August/003158.html
On 28 Aug 14:59, Daniel Axtens wrote: > >> PYTHONPATH="$PATCHWORK_BASE":"$PATCHWORK_BASE/lib/python:$PYTHONPATH" \ > >> DJANGO_SETTINGS_MODULE=patchwork.settings.production \ > >> - "$PATCHWORK_BASE/patchwork/bin/parsemail.py" > >> + "$PATCHWORK_BASE/patchwork/manage.py parsemail" > >> > >> +# NOTE(stephenfin): We must return 0 here. For more information, refer > >> +# to https://patchwork.ozlabs.org/patch/602248/ > >> exit 0 > > > > This makes sense, but I'm still confused why you don't pass $* to the > > command since it can take "--list-id" as an option? True that. $@ looks like the correct option though, per StackOverflow? [1]. > Yep - I had added $@ to my invocation, which I think does something > similar. > > Regards, > Daniel I'll do this a v3 so :) Stephen [1] http://superuser.com/a/247131
On 28 Aug 21:10, Daniel Axtens wrote: > Daniel Axtens <dja@axtens.net> writes: > > > > with open(args[0], 'rb') as file_: > > decoded_mail = file_.read().decode('utf-8') > > mail = email.message_from_string(decoded_mail) > > > > This is super ugly, but works in Py3. Ironically it doesn't work in Py2, > > but it's a start. Could you include something like this in this patch > > set? I think the parsearchive will require something similar too. > > Looks like there's message_from_bytes and message_from_binary_file in > recent versions of 'email' - I haven't tried but I assume that would be > better. Anyway, I'll leave it to your judgement. > > Regards, > Daniel This is a definite issue, but I think it's outside of the scope of this series. Could you open a bug on the issue tracker [1] and we'll resolve it in a follow-up? Stephen [1] https://github.com/getpatchwork/patchwork/issues > > > > I'm going to start collecting these "interesting" emails to make a test suite. > > > > Regards, > > Daniel > > > > [0] https://lists.ozlabs.org/pipermail/patchwork/2016-August/003158.html
Stephen Finucane <stephenfinucane@hotmail.com> writes: > On 28 Aug 21:10, Daniel Axtens wrote: >> Daniel Axtens <dja@axtens.net> writes: >> >> >> > with open(args[0], 'rb') as file_: >> > decoded_mail = file_.read().decode('utf-8') >> > mail = email.message_from_string(decoded_mail) >> > >> > This is super ugly, but works in Py3. Ironically it doesn't work in Py2, >> > but it's a start. Could you include something like this in this patch >> > set? I think the parsearchive will require something similar too. >> >> Looks like there's message_from_bytes and message_from_binary_file in >> recent versions of 'email' - I haven't tried but I assume that would be >> better. Anyway, I'll leave it to your judgement. >> >> Regards, >> Daniel > > This is a definite issue, but I think it's outside of the scope of this > series. Could you open a bug on the issue tracker [1] and we'll resolve > it in a follow-up? Fair enough. Will do. Regards, Daniel > > Stephen > > [1] https://github.com/getpatchwork/patchwork/issues > >> > >> > I'm going to start collecting these "interesting" emails to make a test suite. >> > >> > Regards, >> > Daniel >> > >> > [0] https://lists.ozlabs.org/pipermail/patchwork/2016-August/003158.html
diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py deleted file mode 100755 index b35b1f6..0000000 --- a/patchwork/bin/parsemail.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# -# Patchwork - automated patch tracking system -# Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org> -# -# This file is part of the Patchwork package. -# -# Patchwork is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# Patchwork is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Patchwork; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -from __future__ import absolute_import - -import argparse -from email import message_from_file -import logging -import sys - -import django -from django.conf import settings -from django.utils.log import AdminEmailHandler - -from patchwork.parser import parse_mail - -LOGGER = logging.getLogger(__name__) - -VERBOSITY_LEVELS = { - 'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL -} - - -extra_error_message = ''' -== Mail - -%(mail)s - - -== Traceback - -''' - - -def setup_error_handler(): - """Configure error handler. - - Ensure emails are send to settings.ADMINS when errors are - encountered. - """ - if settings.DEBUG: - return - - mail_handler = AdminEmailHandler() - mail_handler.setLevel(logging.ERROR) - mail_handler.setFormatter(logging.Formatter(extra_error_message)) - - logger = logging.getLogger('patchwork') - logger.addHandler(mail_handler) - - return logger - - -def main(args): - django.setup() - logger = setup_error_handler() - parser = argparse.ArgumentParser() - - def list_logging_levels(): - """Give a summary of all available logging levels.""" - return sorted(list(VERBOSITY_LEVELS.keys()), - key=lambda x: VERBOSITY_LEVELS[x]) - - parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), - default=sys.stdin, help='input mbox file (a filename ' - 'or stdin)') - - group = parser.add_argument_group('Mail parsing configuration') - group.add_argument('--list-id', help='mailing list ID. If not supplied ' - 'this will be extracted from the mail headers.') - group.add_argument('--verbosity', choices=list_logging_levels(), - help='debug level', default='info') - - args = vars(parser.parse_args()) - - logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']]) - - mail = message_from_file(args['infile']) - try: - result = parse_mail(mail, args['list_id']) - if result: - return 0 - return 1 - except: - if logger: - logger.exception('Error when parsing incoming email', extra={ - 'mail': mail.as_string(), - }) - raise - -if __name__ == '__main__': - sys.exit(main(sys.argv)) diff --git a/patchwork/bin/parsemail.sh b/patchwork/bin/parsemail.sh index 9973392..85a21a1 100755 --- a/patchwork/bin/parsemail.sh +++ b/patchwork/bin/parsemail.sh @@ -24,6 +24,8 @@ PATCHWORK_BASE=`readlink -e $BIN_DIR/../..` PYTHONPATH="$PATCHWORK_BASE":"$PATCHWORK_BASE/lib/python:$PYTHONPATH" \ DJANGO_SETTINGS_MODULE=patchwork.settings.production \ - "$PATCHWORK_BASE/patchwork/bin/parsemail.py" + "$PATCHWORK_BASE/patchwork/manage.py parsemail" +# NOTE(stephenfin): We must return 0 here. For more information, refer +# to https://patchwork.ozlabs.org/patch/602248/ exit 0 diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py new file mode 100644 index 0000000..b1599e3 --- /dev/null +++ b/patchwork/management/commands/parsemail.py @@ -0,0 +1,61 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2016 Intel Corporation +# +# This file is part of the Patchwork package. +# +# Patchwork is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Patchwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchwork; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +from email import message_from_file +import logging +from optparse import make_option +import sys + +from django.core.management import base + +from patchwork.parser import parse_mail + +logger = logging.getLogger(__name__) + + +class Command(base.BaseCommand): + help = 'Parse an mbox file and store any patch/comment found' + args = '<infile>' # Django < 1.8 compatibility + option_list = base.BaseCommand.option_list + ( + make_option( + '--list-id', + help='mailing list ID. If not supplied, this will be extracted ' + 'from the mail headers.' + ), + ) + + def handle(self, *args, **options): + # Attempt to parse the path if provided, and fallback to stdin if not + if args: + logger.info('Parsing mail loaded by filename') + with open(args[0]) as file_: + mail = message_from_file(file_) + else: + logger.info('Parsing mail loaded from stdin') + mail = message_from_file(sys.stdin) + + try: + result = parse_mail(mail, options['list_id']) + if result: + sys.exit(0) + logger.warning('Failed to parse mail.') + sys.exit(1) + except Exception as exc: + logger.exception('Error when parsing incoming email', + extra={'mail': mail.as_string()}) diff --git a/patchwork/parser.py b/patchwork/parser.py index 1805df8..51de997 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -42,7 +42,7 @@ _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile(r'^(---|\+\+\+) (\S+)') list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list'] -LOGGER = logging.getLogger(__name__) +logger = logging.getLogger(__name__) def normalise_space(str): @@ -599,7 +599,7 @@ def parse_mail(mail, list_id=None): hint = mail.get('X-Patchwork-Hint', '').lower() if hint == 'ignore': - LOGGER.debug("Ignoring email due to 'ignore' hint") + logger.debug("Ignoring email due to 'ignore' hint") return if list_id: @@ -608,7 +608,7 @@ def parse_mail(mail, list_id=None): project = find_project_by_header(mail) if project is None: - LOGGER.error('Failed to find a project for email') + logger.error('Failed to find a project for email') return # parse content @@ -651,7 +651,7 @@ def parse_mail(mail, list_id=None): delegate=delegate, state=find_state(mail)) patch.save() - LOGGER.debug('Patch saved') + logger.debug('Patch saved') return patch elif x == 0: # (potential) cover letters @@ -683,7 +683,7 @@ def parse_mail(mail, list_id=None): submitter=author, content=message) cover_letter.save() - LOGGER.debug('Cover letter saved') + logger.debug('Cover letter saved') return cover_letter @@ -704,7 +704,7 @@ def parse_mail(mail, list_id=None): submitter=author, content=message) comment.save() - LOGGER.debug('Comment saved') + logger.debug('Comment saved') return comment diff --git a/patchwork/settings/base.py b/patchwork/settings/base.py index b78ed4b..a32710f 100644 --- a/patchwork/settings/base.py +++ b/patchwork/settings/base.py @@ -125,6 +125,8 @@ STATICFILES_DIRS = [ # Third-party application settings # +# rest_framework + try: # django rest framework isn't a standard package in most distros, so # don't make it compulsory @@ -136,17 +138,65 @@ try: except ImportError: pass -# -# Third-party application settings -# - -# rest_framework REST_FRAMEWORK = { 'DEFAULT_VERSIONING_CLASS': 'rest_framework.versioning.NamespaceVersioning' } # +# Logging settings +# + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'email': { + 'format': '== Mail\n\n%(mail)s\n\n== Traceback\n', + }, + }, + 'filters': { + 'require_debug_false': { + '()': 'django.utils.log.RequireDebugFalse', + }, + 'require_debug_true': { + '()': 'django.utils.log.RequireDebugTrue', + }, + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', + 'filters': ['require_debug_true'], + 'class': 'logging.StreamHandler', + }, + 'mail_admins': { + 'level': 'ERROR', + 'filters': ['require_debug_false'], + 'class': 'django.utils.log.AdminEmailHandler', + 'formatter': 'email', + 'include_html': True, + }, + }, + 'loggers': { + 'django': { + 'handlers': ['console'], + 'level': 'INFO', + 'propagate': True, + }, + 'patchwork.parser': { + 'handlers': ['console'], + 'level': 'DEBUG', + 'propagate': False, + }, + 'patchwork.management.commands': { + 'handlers': ['console', 'mail_admins'], + 'level': 'INFO', + 'propagate': True, + }, + }, +} + +# # Patchwork settings #