From patchwork Mon Nov 18 06:00:38 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [v2,1/5] Move email address parsing functions to a separate module X-Patchwork-Submitter: Doug Anderson X-Patchwork-Id: 291948 X-Patchwork-Delegate: stephen.finucane@intel.com Message-Id: <1384754442-27219-2-git-send-email-dianders@chromium.org> To: Jeremy Kerr Cc: patchwork@lists.ozlabs.org Date: Sun, 17 Nov 2013 22:00:38 -0800 From: Doug Anderson List-Id: Patchwork development A future patch would like to be able to parse out an email address in a file other than parsemail.py. Create a common emailutils module to hanlde this. Signed-off-by: Doug Anderson --- apps/patchwork/bin/parsemail.py | 54 ++--------------------- apps/patchwork/emailutils.py | 94 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 50 deletions(-) create mode 100644 apps/patchwork/emailutils.py diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index b6eb97a..92d6bb3 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -26,13 +26,14 @@ import time import operator from email import message_from_file try: - from email.header import Header, decode_header + from email.header import Header from email.utils import parsedate_tz, mktime_tz except ImportError: # Python 2.4 compatibility - from email.Header import Header, decode_header + from email.Header import Header from email.Utils import parsedate_tz, mktime_tz +from patchwork.emailutils import clean_header, normalise_space, parse_from from patchwork.parser import parse_patch from patchwork.models import Patch, Project, Person, Comment, State, \ get_default_initial_patch_state @@ -40,23 +41,6 @@ from django.contrib.auth.models import User list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list'] -whitespace_re = re.compile('\s+') -def normalise_space(str): - return whitespace_re.sub(' ', str).strip() - -def clean_header(header): - """ Decode (possibly non-ascii) headers """ - - def decode(fragment): - (frag_str, frag_encoding) = fragment - if frag_encoding: - return frag_str.decode(frag_encoding) - return frag_str.decode() - - fragments = map(decode, decode_header(header)) - - return normalise_space(u' '.join(fragments)) - def find_project(mail): project = None listid_res = [re.compile('.*<([^>]+)>.*', re.S), @@ -84,37 +68,7 @@ def find_project(mail): return project def find_author(mail): - - from_header = clean_header(mail.get('From')) - (name, email) = (None, None) - - # tuple of (regex, fn) - # - where fn returns a (name, email) tuple from the match groups resulting - # from re.match().groups() - from_res = [ - # for "Firstname Lastname" style addresses - (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))), - - # for example@example.com (Firstname Lastname) style addresses - (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))), - - # everything else - (re.compile('(.*)'), (lambda g: (None, g[0]))), - ] - - for regex, fn in from_res: - match = regex.match(from_header) - if match: - (name, email) = fn(match.groups()) - break - - if email is None: - raise Exception("Could not parse From: header") - - email = email.strip() - if name is not None: - name = name.strip() - + name, email = parse_from(mail.get('From')) new_person = False try: diff --git a/apps/patchwork/emailutils.py b/apps/patchwork/emailutils.py new file mode 100644 index 0000000..2c906a9 --- /dev/null +++ b/apps/patchwork/emailutils.py @@ -0,0 +1,94 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2008 Jeremy Kerr +# +# This file is part of the Patchwork package. +# +# Patchwork is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Patchwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchwork; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +import re + +try: + from email.header import decode_header +except ImportError: + # Python 2.4 compatibility + from email.Header import decode_header + +whitespace_re = re.compile('\s+') +def normalise_space(str): + return whitespace_re.sub(' ', str).strip() + +def clean_header(header): + """ Decode (possibly non-ascii) headers """ + + def decode(fragment): + (frag_str, frag_encoding) = fragment + if frag_encoding: + return frag_str.decode(frag_encoding) + return frag_str.decode() + + fragments = map(decode, decode_header(header)) + + return normalise_space(u' '.join(fragments)) + +def parse_from(from_header): + """Parse a "From" header into a (unicode) name and email address. + + >>> parse_from("=?utf-8?b?RG/DvGc=?= Anderson ") + (u'Do\\xfcg Anderson', u'dianders@chromium.org') + >>> parse_from("Doug =?utf-8?b?QW5kw6lyc29u?= ") + (u'Doug And\\xe9rson', u'dianders@chromium.org') + >>> parse_from("=?utf-8?b?RG/DvGcgQW5kw6lyc29u?= ") + (u'Do\\xfcg And\\xe9rson', u'dianders@chromium.org') + >>> parse_from("Doug Anderson ") + (u'Doug Anderson', u'dianders@chromium.org') + + @from_header: An ASCII string containing the "From" header maybe encoded + with RFC 2822. + @return: A tuple (name, email) where name is a unicode version of the name + and email is the email address with no name. + """ + from_header = clean_header(from_header) + (name, email) = (None, None) + + # tuple of (regex, fn) + # - where fn returns a (name, email) tuple from the match groups resulting + # from re.match().groups() + from_res = [ + # for "Firstname Lastname" style addresses + (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))), + + # for example@example.com (Firstname Lastname) style addresses + (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))), + + # everything else + (re.compile('(.*)'), (lambda g: (None, g[0]))), + ] + + for regex, fn in from_res: + match = regex.match(from_header) + if match: + (name, email) = fn(match.groups()) + break + + if email is None: + raise Exception("Could not parse From: header") + + email = email.strip() + if name is not None: + name = name.strip() + + return name, email +