diff mbox

Make parsemail-batch a managment command

Message ID 20170224042835.12728-1-dja@axtens.net
State Rejected
Headers show

Commit Message

Daniel Axtens Feb. 24, 2017, 4:28 a.m. UTC
This brings it into line with the other import commands.

It's also an order of magnitude or so faster.

Signed-off-by: Daniel Axtens <dja@axtens.net>

---

This isn't that useful per se, but I'm using it to benchmark
the performance impact of changes look for opportunities to
speed up.
---
 patchwork/bin/parsemail-batch.sh                 | 25 +++-----
 patchwork/management/commands/parsemail-batch.py | 77 ++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 17 deletions(-)
 create mode 100644 patchwork/management/commands/parsemail-batch.py

Comments

Stephen Finucane Feb. 25, 2017, 12:08 p.m. UTC | #1
On Fri, 2017-02-24 at 15:28 +1100, Daniel Axtens wrote:
> This brings it into line with the other import commands.
> 
> It's also an order of magnitude or so faster.
> 
> Signed-off-by: Daniel Axtens <dja@axtens.net>

I never got the difference between this and 'parsearchive'. Care to
enlighten me? :)

Stephen

> 
> ---
> 
> This isn't that useful per se, but I'm using it to benchmark
> the performance impact of changes look for opportunities to
> speed up.
> ---
>  patchwork/bin/parsemail-batch.sh                 | 25 +++-----
>  patchwork/management/commands/parsemail-batch.py | 77
> ++++++++++++++++++++++++
>  2 files changed, 85 insertions(+), 17 deletions(-)
>  create mode 100644 patchwork/management/commands/parsemail-batch.py
> 
> diff --git a/patchwork/bin/parsemail-batch.sh
> b/patchwork/bin/parsemail-batch.sh
> index d42712ed0995..f35808e9d11b 100755
> --- a/patchwork/bin/parsemail-batch.sh
> +++ b/patchwork/bin/parsemail-batch.sh
> @@ -20,25 +20,16 @@
>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
> 1307  USA
>  
>  BIN_DIR=$(dirname "$0")
> +PATCHWORK_BASE=$(readlink -e "$BIN_DIR/../..")
>  
> -if [ $# -lt 1 ]; then
> -    echo "usage: $0 <dir> [options]" >&2
> -    exit 1
> +if [ -z "$PW_PYTHON" ]; then
> +    PW_PYTHON=python2
>  fi
>  
> -mail_dir="$1"
> -
> -echo "dir: $mail_dir"
> -
> -if [ ! -d "$mail_dir" ]; then
> -    echo "$mail_dir should be a directory"? >&2
> -    exit 1
> +if [ -z "$DJANGO_SETTINGS_MODULE" ]; then
> +    DJANGO_SETTINGS_MODULE=patchwork.settings.production
>  fi
>  
> -shift
> -
> -find "$mail_dir" -maxdepth 1 |
> -while read -r line; do
> -    echo "$line"
> -    "$BIN_DIR/parsemail.sh" "$@" < "$line"
> -done
> +PYTHONPATH="${PATCHWORK_BASE}:${PATCHWORK_BASE}/lib/python:$PYTHONPA
> TH" \
> +    DJANGO_SETTINGS_MODULE="$DJANGO_SETTINGS_MODULE" \
> +    "$PW_PYTHON" "$PATCHWORK_BASE/manage.py" parsemail-batch "$@"
> diff --git a/patchwork/management/commands/parsemail-batch.py
> b/patchwork/management/commands/parsemail-batch.py
> new file mode 100644
> index 000000000000..ad4dce35f595
> --- /dev/null
> +++ b/patchwork/management/commands/parsemail-batch.py
> @@ -0,0 +1,77 @@
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2017, Daniel Axtens, IBM Corporation.
> +#
> +# This file is part of the Patchwork package.
> +#
> +# Patchwork is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published
> by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# Patchwork is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +
> +import email
> +import logging
> +from optparse import make_option
> +import sys
> +import os
> +
> +import django
> +from django.core.management import base
> +from django.utils import six
> +
> +from patchwork.parser import parse_mail
> +
> +logger = logging.getLogger(__name__)
> +
> +
> +class Command(base.BaseCommand):
> +    help = 'Parse a directory of mbox files and store any
> patches/comments found.'
> +
> +    if django.VERSION < (1, 8):
> +        args = '<indir>'
> +        option_list = base.BaseCommand.option_list + (
> +            make_option(
> +                '--list-id',
> +                help='mailing list ID. If not supplied, this will be
> '
> +                'extracted from the mail headers.'),
> +        )
> +    else:
> +        def add_arguments(self, parser):
> +            parser.add_argument(
> +                'indir',
> +                type=str,
> +                default=None,
> +                help='input mbox directory')
> +            parser.add_argument(
> +                '--list-id',
> +                help='mailing list ID. If not supplied, this will be
> '
> +                'extracted from the mail headers.')
> +
> +    def handle(self, *args, **options):
> +        indir = args[0] if args else options['indir']
> +
> +        if not os.path.isdir(indir):
> +            logger.error('%s is not a directory' % indir)
> +            sys.exit(1)
> +
> +        for infile in sorted(os.listdir(indir)):
> +            print(infile)
> +            if six.PY3:
> +                with open(os.path.join(indir, infile), 'rb') as
> file_:
> +                    mail = email.message_from_binary_file(file_)
> +            else:
> +                with open(os.path.join(indir, infile)) as file_:
> +                    mail = email.message_from_file(file_)
> +
> +            try:
> +                result = parse_mail(mail, options['list_id'])
> +                if not result:
> +                    logger.warning('Failed to parse mail')
> +            except Exception as e:
> +                logger.warning('Error when parsing incoming email '
> + \
> +                               str(e),
> +                               extra={'mail': mail.as_string()})
Daniel Axtens Feb. 25, 2017, 11:56 p.m. UTC | #2
Stephen Finucane <stephen@that.guru> writes:

> On Fri, 2017-02-24 at 15:28 +1100, Daniel Axtens wrote:
>> This brings it into line with the other import commands.
>> 
>> It's also an order of magnitude or so faster.
>> 
>> Signed-off-by: Daniel Axtens <dja@axtens.net>
>
> I never got the difference between this and 'parsearchive'. Care to
> enlighten me? :)

parsemail-batch takes a directory, parsearchive takes an mbox file.

An mbox file is basically a concatenation of messages, so you can
convert between the two, but while there are tools to go from mbox to
maildir, I haven't found a tool to go the other way. You have to add a
Unix From line and getting the delimiters right is a bit
tricky/annoying.

As a notmuch user, my mail is automatically in a directory, and a
combination of notmuch search and xargs allows me to create an arbitrary
directory of mail matching a set of criteria, for example:

notmuch search --output=files tag:lkml | grep -v 'sent/cur' | xargs cp -t lkml

gets me mail I received from LKML.

So I use parsemail-batch *a lot*.

Also, with this, we could in future reduce all the shell commands to 1
single little wrappers that checks what it's name was and calls the
right management command - like 'python manage.py $(basename $0
.sh)'. We can then symlink it in to place under different names.

Given how easy shell scripts are to screw up, this would reduce our
scope for error. Texinfo does something like this too, which is where I
got the idea.

Regards,
Daniel

>
> Stephen
>
>> 
>> ---
>> 
>> This isn't that useful per se, but I'm using it to benchmark
>> the performance impact of changes look for opportunities to
>> speed up.
>> ---
>>  patchwork/bin/parsemail-batch.sh                 | 25 +++-----
>>  patchwork/management/commands/parsemail-batch.py | 77
>> ++++++++++++++++++++++++
>>  2 files changed, 85 insertions(+), 17 deletions(-)
>>  create mode 100644 patchwork/management/commands/parsemail-batch.py
>> 
>> diff --git a/patchwork/bin/parsemail-batch.sh
>> b/patchwork/bin/parsemail-batch.sh
>> index d42712ed0995..f35808e9d11b 100755
>> --- a/patchwork/bin/parsemail-batch.sh
>> +++ b/patchwork/bin/parsemail-batch.sh
>> @@ -20,25 +20,16 @@
>>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
>> 1307  USA
>>  
>>  BIN_DIR=$(dirname "$0")
>> +PATCHWORK_BASE=$(readlink -e "$BIN_DIR/../..")
>>  
>> -if [ $# -lt 1 ]; then
>> -    echo "usage: $0 <dir> [options]" >&2
>> -    exit 1
>> +if [ -z "$PW_PYTHON" ]; then
>> +    PW_PYTHON=python2
>>  fi
>>  
>> -mail_dir="$1"
>> -
>> -echo "dir: $mail_dir"
>> -
>> -if [ ! -d "$mail_dir" ]; then
>> -    echo "$mail_dir should be a directory"? >&2
>> -    exit 1
>> +if [ -z "$DJANGO_SETTINGS_MODULE" ]; then
>> +    DJANGO_SETTINGS_MODULE=patchwork.settings.production
>>  fi
>>  
>> -shift
>> -
>> -find "$mail_dir" -maxdepth 1 |
>> -while read -r line; do
>> -    echo "$line"
>> -    "$BIN_DIR/parsemail.sh" "$@" < "$line"
>> -done
>> +PYTHONPATH="${PATCHWORK_BASE}:${PATCHWORK_BASE}/lib/python:$PYTHONPA
>> TH" \
>> +    DJANGO_SETTINGS_MODULE="$DJANGO_SETTINGS_MODULE" \
>> +    "$PW_PYTHON" "$PATCHWORK_BASE/manage.py" parsemail-batch "$@"
>> diff --git a/patchwork/management/commands/parsemail-batch.py
>> b/patchwork/management/commands/parsemail-batch.py
>> new file mode 100644
>> index 000000000000..ad4dce35f595
>> --- /dev/null
>> +++ b/patchwork/management/commands/parsemail-batch.py
>> @@ -0,0 +1,77 @@
>> +# Patchwork - automated patch tracking system
>> +# Copyright (C) 2017, Daniel Axtens, IBM Corporation.
>> +#
>> +# This file is part of the Patchwork package.
>> +#
>> +# Patchwork is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published
>> by
>> +# the Free Software Foundation; either version 2 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# Patchwork is distributed in the hope that it will be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +
>> +import email
>> +import logging
>> +from optparse import make_option
>> +import sys
>> +import os
>> +
>> +import django
>> +from django.core.management import base
>> +from django.utils import six
>> +
>> +from patchwork.parser import parse_mail
>> +
>> +logger = logging.getLogger(__name__)
>> +
>> +
>> +class Command(base.BaseCommand):
>> +    help = 'Parse a directory of mbox files and store any
>> patches/comments found.'
>> +
>> +    if django.VERSION < (1, 8):
>> +        args = '<indir>'
>> +        option_list = base.BaseCommand.option_list + (
>> +            make_option(
>> +                '--list-id',
>> +                help='mailing list ID. If not supplied, this will be
>> '
>> +                'extracted from the mail headers.'),
>> +        )
>> +    else:
>> +        def add_arguments(self, parser):
>> +            parser.add_argument(
>> +                'indir',
>> +                type=str,
>> +                default=None,
>> +                help='input mbox directory')
>> +            parser.add_argument(
>> +                '--list-id',
>> +                help='mailing list ID. If not supplied, this will be
>> '
>> +                'extracted from the mail headers.')
>> +
>> +    def handle(self, *args, **options):
>> +        indir = args[0] if args else options['indir']
>> +
>> +        if not os.path.isdir(indir):
>> +            logger.error('%s is not a directory' % indir)
>> +            sys.exit(1)
>> +
>> +        for infile in sorted(os.listdir(indir)):
>> +            print(infile)
>> +            if six.PY3:
>> +                with open(os.path.join(indir, infile), 'rb') as
>> file_:
>> +                    mail = email.message_from_binary_file(file_)
>> +            else:
>> +                with open(os.path.join(indir, infile)) as file_:
>> +                    mail = email.message_from_file(file_)
>> +
>> +            try:
>> +                result = parse_mail(mail, options['list_id'])
>> +                if not result:
>> +                    logger.warning('Failed to parse mail')
>> +            except Exception as e:
>> +                logger.warning('Error when parsing incoming email '
>> + \
>> +                               str(e),
>> +                               extra={'mail': mail.as_string()})
Stephen Finucane Feb. 26, 2017, 3:23 p.m. UTC | #3
On Sun, 2017-02-26 at 10:56 +1100, Daniel Axtens wrote:
> Stephen Finucane <stephen@that.guru> writes:
> 
> > On Fri, 2017-02-24 at 15:28 +1100, Daniel Axtens wrote:
> > > This brings it into line with the other import commands.
> > > 
> > > It's also an order of magnitude or so faster.
> > > 
> > > Signed-off-by: Daniel Axtens <dja@axtens.net>
> > 
> > I never got the difference between this and 'parsearchive'. Care to
> > enlighten me? :)
> 
> parsemail-batch takes a directory, parsearchive takes an mbox file.

Out of curiosity, are you referring to a "maildir"? If so, see below.

> An mbox file is basically a concatenation of messages, so you can
> convert between the two, but while there are tools to go from mbox to
> maildir, I haven't found a tool to go the other way. You have to add
> a
> Unix From line and getting the delimiters right is a bit
> tricky/annoying.

I don't know about converting between the two, but it would seem the
'mailbox' library that we use for reading mbox files in 'parsearchive'
[1] also supports parsing of Maildirs [2]. I wonder if we could pass a
flag to this script instead or even detect if the target is a file
(mbox) or directory (maildir)?

[1] https://github.com/getpatchwork/patchwork/blob/07835705/patchwork/m
anagement/commands/parsearchive.py#L72
[2] https://pymotw.com/2/mailbox/#maildir

> As a notmuch user, my mail is automatically in a directory, and a
> combination of notmuch search and xargs allows me to create an
> arbitrary
> directory of mail matching a set of criteria, for example:
> 
> notmuch search --output=files tag:lkml | grep -v 'sent/cur' | xargs
> cp -t lkml
> 
> gets me mail I received from LKML.

Very nice. I wasn't aware of this tool. That might be something worth
documenting in the user manual.

> So I use parsemail-batch *a lot*.
> 
> Also, with this, we could in future reduce all the shell commands to
> 1
> single little wrappers that checks what it's name was and calls the
> right management command - like 'python manage.py $(basename $0
> .sh)'. We can then symlink it in to place under different names.

For sure. I tried to unite the parsemail and parsearchive scripts when
converting them to management commands but I couldn't find a way to
read from stdin with the 'mailbox' library [3], unlike the 'email'
library [4].

[3] https://github.com/getpatchwork/patchwork/blob/07835705/patchwork/m
anagement/commands/parsearchive.py#L66
[4] https://github.com/getpatchwork/patchwork/blob/07835705/patchwork/m
anagement/commands/parsemail.py#L61-L68

> Given how easy shell scripts are to screw up, this would reduce our
> scope for error. Texinfo does something like this too, which is where
> I
> got the idea.

Yup, I'm 100% behind doing this. I'm just making sure we're not
introducing unnecessary duplication while doing so :)

Stephen

> Regards,
> Daniel
> 
> > 
> > Stephen
> > 
> > > 
> > > ---
> > > 
> > > This isn't that useful per se, but I'm using it to benchmark
> > > the performance impact of changes look for opportunities to
> > > speed up.
> > > ---
> > >  patchwork/bin/parsemail-batch.sh                 | 25 +++-----
> > >  patchwork/management/commands/parsemail-batch.py | 77
> > > ++++++++++++++++++++++++
> > >  2 files changed, 85 insertions(+), 17 deletions(-)
> > >  create mode 100644 patchwork/management/commands/parsemail-
> > > batch.py
> > > 
> > > diff --git a/patchwork/bin/parsemail-batch.sh
> > > b/patchwork/bin/parsemail-batch.sh
> > > index d42712ed0995..f35808e9d11b 100755
> > > --- a/patchwork/bin/parsemail-batch.sh
> > > +++ b/patchwork/bin/parsemail-batch.sh
> > > @@ -20,25 +20,16 @@
> > >  # Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > > MA  02111-
> > > 1307  USA
> > >  
> > >  BIN_DIR=$(dirname "$0")
> > > +PATCHWORK_BASE=$(readlink -e "$BIN_DIR/../..")
> > >  
> > > -if [ $# -lt 1 ]; then
> > > -    echo "usage: $0 <dir> [options]" >&2
> > > -    exit 1
> > > +if [ -z "$PW_PYTHON" ]; then
> > > +    PW_PYTHON=python2
> > >  fi
> > >  
> > > -mail_dir="$1"
> > > -
> > > -echo "dir: $mail_dir"
> > > -
> > > -if [ ! -d "$mail_dir" ]; then
> > > -    echo "$mail_dir should be a directory"? >&2
> > > -    exit 1
> > > +if [ -z "$DJANGO_SETTINGS_MODULE" ]; then
> > > +    DJANGO_SETTINGS_MODULE=patchwork.settings.production
> > >  fi
> > >  
> > > -shift
> > > -
> > > -find "$mail_dir" -maxdepth 1 |
> > > -while read -r line; do
> > > -    echo "$line"
> > > -    "$BIN_DIR/parsemail.sh" "$@" < "$line"
> > > -done
> > > +PYTHONPATH="${PATCHWORK_BASE}:${PATCHWORK_BASE}/lib/python:$PYTH
> > > ONPA
> > > TH" \
> > > +    DJANGO_SETTINGS_MODULE="$DJANGO_SETTINGS_MODULE" \
> > > +    "$PW_PYTHON" "$PATCHWORK_BASE/manage.py" parsemail-batch
> > > "$@"
> > > diff --git a/patchwork/management/commands/parsemail-batch.py
> > > b/patchwork/management/commands/parsemail-batch.py
> > > new file mode 100644
> > > index 000000000000..ad4dce35f595
> > > --- /dev/null
> > > +++ b/patchwork/management/commands/parsemail-batch.py
> > > @@ -0,0 +1,77 @@
> > > +# Patchwork - automated patch tracking system
> > > +# Copyright (C) 2017, Daniel Axtens, IBM Corporation.
> > > +#
> > > +# This file is part of the Patchwork package.
> > > +#
> > > +# Patchwork is free software; you can redistribute it and/or
> > > modify
> > > +# it under the terms of the GNU General Public License as
> > > published
> > > by
> > > +# the Free Software Foundation; either version 2 of the License,
> > > or
> > > +# (at your option) any later version.
> > > +#
> > > +# Patchwork is distributed in the hope that it will be useful,
> > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > +# GNU General Public License for more details.
> > > +
> > > +import email
> > > +import logging
> > > +from optparse import make_option
> > > +import sys
> > > +import os
> > > +
> > > +import django
> > > +from django.core.management import base
> > > +from django.utils import six
> > > +
> > > +from patchwork.parser import parse_mail
> > > +
> > > +logger = logging.getLogger(__name__)
> > > +
> > > +
> > > +class Command(base.BaseCommand):
> > > +    help = 'Parse a directory of mbox files and store any
> > > patches/comments found.'
> > > +
> > > +    if django.VERSION < (1, 8):
> > > +        args = '<indir>'
> > > +        option_list = base.BaseCommand.option_list + (
> > > +            make_option(
> > > +                '--list-id',
> > > +                help='mailing list ID. If not supplied, this
> > > will be
> > > '
> > > +                'extracted from the mail headers.'),
> > > +        )
> > > +    else:
> > > +        def add_arguments(self, parser):
> > > +            parser.add_argument(
> > > +                'indir',
> > > +                type=str,
> > > +                default=None,
> > > +                help='input mbox directory')
> > > +            parser.add_argument(
> > > +                '--list-id',
> > > +                help='mailing list ID. If not supplied, this
> > > will be
> > > '
> > > +                'extracted from the mail headers.')
> > > +
> > > +    def handle(self, *args, **options):
> > > +        indir = args[0] if args else options['indir']
> > > +
> > > +        if not os.path.isdir(indir):
> > > +            logger.error('%s is not a directory' % indir)
> > > +            sys.exit(1)
> > > +
> > > +        for infile in sorted(os.listdir(indir)):
> > > +            print(infile)
> > > +            if six.PY3:
> > > +                with open(os.path.join(indir, infile), 'rb') as
> > > file_:
> > > +                    mail = email.message_from_binary_file(file_)
> > > +            else:
> > > +                with open(os.path.join(indir, infile)) as file_:
> > > +                    mail = email.message_from_file(file_)
> > > +
> > > +            try:
> > > +                result = parse_mail(mail, options['list_id'])
> > > +                if not result:
> > > +                    logger.warning('Failed to parse mail')
> > > +            except Exception as e:
> > > +                logger.warning('Error when parsing incoming
> > > email '
> > > + \
> > > +                               str(e),
> > > +                               extra={'mail': mail.as_string()})
diff mbox

Patch

diff --git a/patchwork/bin/parsemail-batch.sh b/patchwork/bin/parsemail-batch.sh
index d42712ed0995..f35808e9d11b 100755
--- a/patchwork/bin/parsemail-batch.sh
+++ b/patchwork/bin/parsemail-batch.sh
@@ -20,25 +20,16 @@ 
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 BIN_DIR=$(dirname "$0")
+PATCHWORK_BASE=$(readlink -e "$BIN_DIR/../..")
 
-if [ $# -lt 1 ]; then
-    echo "usage: $0 <dir> [options]" >&2
-    exit 1
+if [ -z "$PW_PYTHON" ]; then
+    PW_PYTHON=python2
 fi
 
-mail_dir="$1"
-
-echo "dir: $mail_dir"
-
-if [ ! -d "$mail_dir" ]; then
-    echo "$mail_dir should be a directory"? >&2
-    exit 1
+if [ -z "$DJANGO_SETTINGS_MODULE" ]; then
+    DJANGO_SETTINGS_MODULE=patchwork.settings.production
 fi
 
-shift
-
-find "$mail_dir" -maxdepth 1 |
-while read -r line; do
-    echo "$line"
-    "$BIN_DIR/parsemail.sh" "$@" < "$line"
-done
+PYTHONPATH="${PATCHWORK_BASE}:${PATCHWORK_BASE}/lib/python:$PYTHONPATH" \
+    DJANGO_SETTINGS_MODULE="$DJANGO_SETTINGS_MODULE" \
+    "$PW_PYTHON" "$PATCHWORK_BASE/manage.py" parsemail-batch "$@"
diff --git a/patchwork/management/commands/parsemail-batch.py b/patchwork/management/commands/parsemail-batch.py
new file mode 100644
index 000000000000..ad4dce35f595
--- /dev/null
+++ b/patchwork/management/commands/parsemail-batch.py
@@ -0,0 +1,77 @@ 
+# Patchwork - automated patch tracking system
+# Copyright (C) 2017, Daniel Axtens, IBM Corporation.
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+import email
+import logging
+from optparse import make_option
+import sys
+import os
+
+import django
+from django.core.management import base
+from django.utils import six
+
+from patchwork.parser import parse_mail
+
+logger = logging.getLogger(__name__)
+
+
+class Command(base.BaseCommand):
+    help = 'Parse a directory of mbox files and store any patches/comments found.'
+
+    if django.VERSION < (1, 8):
+        args = '<indir>'
+        option_list = base.BaseCommand.option_list + (
+            make_option(
+                '--list-id',
+                help='mailing list ID. If not supplied, this will be '
+                'extracted from the mail headers.'),
+        )
+    else:
+        def add_arguments(self, parser):
+            parser.add_argument(
+                'indir',
+                type=str,
+                default=None,
+                help='input mbox directory')
+            parser.add_argument(
+                '--list-id',
+                help='mailing list ID. If not supplied, this will be '
+                'extracted from the mail headers.')
+
+    def handle(self, *args, **options):
+        indir = args[0] if args else options['indir']
+
+        if not os.path.isdir(indir):
+            logger.error('%s is not a directory' % indir)
+            sys.exit(1)
+
+        for infile in sorted(os.listdir(indir)):
+            print(infile)
+            if six.PY3:
+                with open(os.path.join(indir, infile), 'rb') as file_:
+                    mail = email.message_from_binary_file(file_)
+            else:
+                with open(os.path.join(indir, infile)) as file_:
+                    mail = email.message_from_file(file_)
+
+            try:
+                result = parse_mail(mail, options['list_id'])
+                if not result:
+                    logger.warning('Failed to parse mail')
+            except Exception as e:
+                logger.warning('Error when parsing incoming email ' + \
+                               str(e),
+                               extra={'mail': mail.as_string()})