Message ID | 1468402860-3409-11-git-send-email-stephen.finucane@intel.com |
---|---|
State | Superseded |
Headers | show |
On 07/13/2016 04:40 AM, Stephen Finucane wrote: > As with parsemail, parsearchive makes more sense as a management > command. Make it so. > > As with the conversion of the 'parsemail' tool, this removes > customisable logging as it's not necessary. > > Signed-off-by: Stephen Finucane <stephen.finucane@intel.com> Reviewed-by: Andy Doan <andy.doan@linaro.org> > Closes-bug: #17 > --- > docs/development.md | 8 +- > patchwork/bin/parsearchive.py | 106 ------------------------- > patchwork/management/commands/parsearchive.py | 87 ++++++++++++++++++++ > 3 files changed, 91 insertions(+), 110 deletions(-) > delete mode 100755 patchwork/bin/parsearchive.py > create mode 100644 patchwork/management/commands/parsearchive.py > > diff --git a/docs/development.md b/docs/development.md > index de5dac5..4a6b994 100644 > --- a/docs/development.md > +++ b/docs/development.md > @@ -192,8 +192,8 @@ using the aptly-named `createsuperuser` command: > > Once this is done, it's beneficial to load some real emails into the system. > This can be done manually, however it's generally much easier to download > -an archive from a Mailman instance and load these using the `parsearchive.py` > -tool. You can do this like so: > +an archive from a Mailman instance and load these using the `parsearchive` > +command. You can do this like so: > > (.venv)$ mm_user=myusername > (.venv)$ mm_pass=mypassword > @@ -212,8 +212,8 @@ find more informations about this [here][ref-mman-bulk]. > Load these archives into Patchwork. Depending on the size of the downloaded > archives this may take some time: > > - (.venv)$ PYTHONPATH=. ./patchwork/bin/parsearchive.py \ > - --list-id=patchwork.ozlabs.org patchwork.mbox > + (.venv)$ ./manage.py parsearchive --list-id=patchwork.ozlabs.org \ > + patchwork.mbox > > Finally, run the server and browse to the IP address of your board using your > browser of choice: > diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py > deleted file mode 100755 > index 8986b22..0000000 > --- a/patchwork/bin/parsearchive.py > +++ /dev/null > @@ -1,106 +0,0 @@ > -#!/usr/bin/env python > -# > -# Patchwork - automated patch tracking system > -# Copyright (C) 2015 Intel Corporation > -# > -# This file is part of the Patchwork package. > -# > -# Patchwork is free software; you can redistribute it and/or modify > -# it under the terms of the GNU General Public License as published by > -# the Free Software Foundation; either version 2 of the License, or > -# (at your option) any later version. > -# > -# Patchwork is distributed in the hope that it will be useful, > -# but WITHOUT ANY WARRANTY; without even the implied warranty of > -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > -# GNU General Public License for more details. > -# > -# You should have received a copy of the GNU General Public License > -# along with Patchwork; if not, write to the Free Software > -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > - > -"""Utility to parse an mbox archive file.""" > - > -from __future__ import absolute_import > - > -import argparse > -import logging > -import mailbox > - > -import django > - > -from patchwork.parser import parse_mail > -from patchwork import models > - > -LOGGER = logging.getLogger(__name__) > - > -VERBOSITY_LEVELS = { > - 'debug': logging.DEBUG, > - 'info': logging.INFO, > - 'warning': logging.WARNING, > - 'error': logging.ERROR, > - 'critical': logging.CRITICAL > -} > - > - > -def parse_mbox(path, list_id): > - results = { > - models.Patch: 0, > - models.CoverLetter: 0, > - models.Comment: 0, > - } > - duplicates = 0 > - dropped = 0 > - > - mbox = mailbox.mbox(path) > - for msg in mbox: > - try: > - obj = parse_mail(msg, list_id) > - if obj: > - results[type(obj)] += 1 > - else: > - dropped += 1 > - except django.db.utils.IntegrityError: > - duplicates += 1 > - print('Processed %(total)d messages -->\n' > - ' %(covers)4d cover letters\n' > - ' %(patches)4d patches\n' > - ' %(comments)4d comments\n' > - ' %(duplicates)4d duplicates\n' > - ' %(dropped)4d dropped\n' > - 'Total: %(new)s new entries' % { > - 'total': len(mbox), > - 'covers': results[models.CoverLetter], > - 'patches': results[models.Patch], > - 'comments': results[models.Comment], > - 'duplicates': duplicates, > - 'dropped': dropped, > - 'new': len(mbox) - duplicates - dropped, > - }) > - > - > -def main(): > - django.setup() > - parser = argparse.ArgumentParser(description=__doc__) > - > - def list_logging_levels(): > - """Give a summary of all available logging levels.""" > - return sorted(VERBOSITY_LEVELS.keys(), > - key=lambda x: VERBOSITY_LEVELS[x]) > - > - parser.add_argument('inpath', help='input mbox filename') > - > - group = parser.add_argument_group('Mail parsing configuration') > - group.add_argument('--list-id', help='mailing list ID. If not supplied ' > - 'this will be extracted from the mail headers.') > - group.add_argument('--verbosity', choices=list_logging_levels(), > - help='debug level', default='info') > - > - args = vars(parser.parse_args()) > - > - logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']]) > - > - parse_mbox(args['inpath'], args['list_id']) > - > -if __name__ == '__main__': > - main() > diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py > new file mode 100644 > index 0000000..093129f > --- /dev/null > +++ b/patchwork/management/commands/parsearchive.py > @@ -0,0 +1,87 @@ > +# Patchwork - automated patch tracking system > +# Copyright (C) 2016 Intel Corporation > +# > +# This file is part of the Patchwork package. > +# > +# Patchwork is free software; you can redistribute it and/or modify > +# it under the terms of the GNU General Public License as published by > +# the Free Software Foundation; either version 2 of the License, or > +# (at your option) any later version. > +# > +# Patchwork is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +# GNU General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with Patchwork; if not, write to the Free Software > +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + > +import logging > +import mailbox > +from optparse import make_option > + > +import django > +from django.core.management.base import BaseCommand > + > +from patchwork import models > +from patchwork.parser import parse_mail > + > +logger = logging.getLogger(__name__) > + > + > +class Command(BaseCommand): > + help = 'Parse an mbox archive file and store any patches/comments found' > + args = '<infile>' # Django < 1.8 compatibility > + option_list = BaseCommand.option_list + ( > + make_option( > + '--list-id', > + help='mailing list ID. If not supplied, this will be extracted ' > + 'from the mail headers.' > + ), > + ) > + > + def handle(self, *args, **options): > + results = { > + models.Patch: 0, > + models.CoverLetter: 0, > + models.Comment: 0, > + } > + duplicates = 0 > + dropped = 0 > + > + # TODO(stephenfin): Support passing via stdin? > + mbox = mailbox.mbox(args[0]) > + count = len(mbox) > + > + logger.info('Parsing %d mails', count) > + for i, msg in enumerate(mbox): > + try: > + obj = parse_mail(msg, options['list_id']) > + if obj: > + results[type(obj)] += 1 > + else: > + dropped += 1 > + except django.db.utils.IntegrityError: > + duplicates += 1 > + > + if (i % 10) == 0: > + self.stdout.write('%06d/%06d\r' % (i, count), ending='') > + self.stdout.flush() > + > + self.stdout.write( > + 'Processed %(total)d messages -->\n' > + ' %(covers)4d cover letters\n' > + ' %(patches)4d patches\n' > + ' %(comments)4d comments\n' > + ' %(duplicates)4d duplicates\n' > + ' %(dropped)4d dropped\n' > + 'Total: %(new)s new entries' % { > + 'total': count, > + 'covers': results[models.CoverLetter], > + 'patches': results[models.Patch], > + 'comments': results[models.Comment], > + 'duplicates': duplicates, > + 'dropped': dropped, > + 'new': count - duplicates - dropped, > + }) >
diff --git a/docs/development.md b/docs/development.md index de5dac5..4a6b994 100644 --- a/docs/development.md +++ b/docs/development.md @@ -192,8 +192,8 @@ using the aptly-named `createsuperuser` command: Once this is done, it's beneficial to load some real emails into the system. This can be done manually, however it's generally much easier to download -an archive from a Mailman instance and load these using the `parsearchive.py` -tool. You can do this like so: +an archive from a Mailman instance and load these using the `parsearchive` +command. You can do this like so: (.venv)$ mm_user=myusername (.venv)$ mm_pass=mypassword @@ -212,8 +212,8 @@ find more informations about this [here][ref-mman-bulk]. Load these archives into Patchwork. Depending on the size of the downloaded archives this may take some time: - (.venv)$ PYTHONPATH=. ./patchwork/bin/parsearchive.py \ - --list-id=patchwork.ozlabs.org patchwork.mbox + (.venv)$ ./manage.py parsearchive --list-id=patchwork.ozlabs.org \ + patchwork.mbox Finally, run the server and browse to the IP address of your board using your browser of choice: diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py deleted file mode 100755 index 8986b22..0000000 --- a/patchwork/bin/parsearchive.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python -# -# Patchwork - automated patch tracking system -# Copyright (C) 2015 Intel Corporation -# -# This file is part of the Patchwork package. -# -# Patchwork is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# Patchwork is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Patchwork; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Utility to parse an mbox archive file.""" - -from __future__ import absolute_import - -import argparse -import logging -import mailbox - -import django - -from patchwork.parser import parse_mail -from patchwork import models - -LOGGER = logging.getLogger(__name__) - -VERBOSITY_LEVELS = { - 'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL -} - - -def parse_mbox(path, list_id): - results = { - models.Patch: 0, - models.CoverLetter: 0, - models.Comment: 0, - } - duplicates = 0 - dropped = 0 - - mbox = mailbox.mbox(path) - for msg in mbox: - try: - obj = parse_mail(msg, list_id) - if obj: - results[type(obj)] += 1 - else: - dropped += 1 - except django.db.utils.IntegrityError: - duplicates += 1 - print('Processed %(total)d messages -->\n' - ' %(covers)4d cover letters\n' - ' %(patches)4d patches\n' - ' %(comments)4d comments\n' - ' %(duplicates)4d duplicates\n' - ' %(dropped)4d dropped\n' - 'Total: %(new)s new entries' % { - 'total': len(mbox), - 'covers': results[models.CoverLetter], - 'patches': results[models.Patch], - 'comments': results[models.Comment], - 'duplicates': duplicates, - 'dropped': dropped, - 'new': len(mbox) - duplicates - dropped, - }) - - -def main(): - django.setup() - parser = argparse.ArgumentParser(description=__doc__) - - def list_logging_levels(): - """Give a summary of all available logging levels.""" - return sorted(VERBOSITY_LEVELS.keys(), - key=lambda x: VERBOSITY_LEVELS[x]) - - parser.add_argument('inpath', help='input mbox filename') - - group = parser.add_argument_group('Mail parsing configuration') - group.add_argument('--list-id', help='mailing list ID. If not supplied ' - 'this will be extracted from the mail headers.') - group.add_argument('--verbosity', choices=list_logging_levels(), - help='debug level', default='info') - - args = vars(parser.parse_args()) - - logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']]) - - parse_mbox(args['inpath'], args['list_id']) - -if __name__ == '__main__': - main() diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py new file mode 100644 index 0000000..093129f --- /dev/null +++ b/patchwork/management/commands/parsearchive.py @@ -0,0 +1,87 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2016 Intel Corporation +# +# This file is part of the Patchwork package. +# +# Patchwork is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Patchwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchwork; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import logging +import mailbox +from optparse import make_option + +import django +from django.core.management.base import BaseCommand + +from patchwork import models +from patchwork.parser import parse_mail + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = 'Parse an mbox archive file and store any patches/comments found' + args = '<infile>' # Django < 1.8 compatibility + option_list = BaseCommand.option_list + ( + make_option( + '--list-id', + help='mailing list ID. If not supplied, this will be extracted ' + 'from the mail headers.' + ), + ) + + def handle(self, *args, **options): + results = { + models.Patch: 0, + models.CoverLetter: 0, + models.Comment: 0, + } + duplicates = 0 + dropped = 0 + + # TODO(stephenfin): Support passing via stdin? + mbox = mailbox.mbox(args[0]) + count = len(mbox) + + logger.info('Parsing %d mails', count) + for i, msg in enumerate(mbox): + try: + obj = parse_mail(msg, options['list_id']) + if obj: + results[type(obj)] += 1 + else: + dropped += 1 + except django.db.utils.IntegrityError: + duplicates += 1 + + if (i % 10) == 0: + self.stdout.write('%06d/%06d\r' % (i, count), ending='') + self.stdout.flush() + + self.stdout.write( + 'Processed %(total)d messages -->\n' + ' %(covers)4d cover letters\n' + ' %(patches)4d patches\n' + ' %(comments)4d comments\n' + ' %(duplicates)4d duplicates\n' + ' %(dropped)4d dropped\n' + 'Total: %(new)s new entries' % { + 'total': count, + 'covers': results[models.CoverLetter], + 'patches': results[models.Patch], + 'comments': results[models.Comment], + 'duplicates': duplicates, + 'dropped': dropped, + 'new': count - duplicates - dropped, + })
As with parsemail, parsearchive makes more sense as a management command. Make it so. As with the conversion of the 'parsemail' tool, this removes customisable logging as it's not necessary. Signed-off-by: Stephen Finucane <stephen.finucane@intel.com> Closes-bug: #17 --- docs/development.md | 8 +- patchwork/bin/parsearchive.py | 106 ------------------------- patchwork/management/commands/parsearchive.py | 87 ++++++++++++++++++++ 3 files changed, 91 insertions(+), 110 deletions(-) delete mode 100755 patchwork/bin/parsearchive.py create mode 100644 patchwork/management/commands/parsearchive.py