Patchwork [V2] Use email addresses of recipients as fallback to lookup the patch's project

login
register
mail settings
Submitter Guilherme Salgado
Date April 19, 2011, 11:44 a.m.
Message ID <20110419114134.3452.12448.stgit@localhost6.localdomain6>
Download mbox | patch
Permalink /patch/91955/
State Superseded
Headers show

Comments

Guilherme Salgado - April 19, 2011, 11:44 a.m.
This is used only when the List-ID lookup doesn't return any projects *and*
the PATCHWORK_FALLBACK_TO_LISTEMAIL setting is set to True.

Signed-off-by: Guilherme Salgado <guilherme.salgado@linaro.org>
---

This second version has the changes suggested by Jeremy to the email regexp.

 apps/patchwork/bin/parsemail.py     |   26 ++++++++++++++++++
 apps/patchwork/tests/patchparser.py |   50 ++++++++++++++++++++++++++++++++---
 apps/settings.py                    |    3 ++
 3 files changed, 74 insertions(+), 5 deletions(-)

Patch

diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index c36dae4..2057457 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -24,6 +24,7 @@  import re
 import datetime
 import time
 import operator
+import settings
 from email import message_from_file
 try:
     from email.header import Header, decode_header
@@ -57,7 +58,7 @@  def clean_header(header):
 
     return normalise_space(u' '.join(fragments))
 
-def find_project(mail):
+def find_project_by_listid(mail):
     project = None
     listid_res = [re.compile('.*<([^>]+)>.*', re.S),
                   re.compile('^([\S]+)$', re.S)]
@@ -83,6 +84,29 @@  def find_project(mail):
 
     return project
 
+def extract_email_addresses(str):
+    email_re = re.compile(
+        r"([\w\.\-+=]+@(?:(?:[0-9a-zA-Z-]{1,}\.)*)[a-zA-Z]{2,})")
+    return email_re.findall(str)
+
+def find_project_by_list_address(mail):
+    recipients = mail.get('To', '') + ' ' + mail.get('CC', '')
+    email_addresses = extract_email_addresses(recipients)
+    for email_address in email_addresses:
+        try:
+            return Project.objects.get(listemail = email_address)
+        except Project.DoesNotExist:
+            pass
+    print ("Unable to find a project for any of the recipients: %s" %
+           email_addresses)
+    return None
+
+def find_project(mail):
+    project = find_project_by_listid(mail)
+    if project is None and settings.PATCHWORK_FALLBACK_TO_LISTEMAIL:
+        project = find_project_by_list_address(mail)
+    return project
+
 def find_author(mail):
 
     from_header = clean_header(mail.get('From'))
diff --git a/apps/patchwork/tests/patchparser.py b/apps/patchwork/tests/patchparser.py
index 7013e85..d141412 100644
--- a/apps/patchwork/tests/patchparser.py
+++ b/apps/patchwork/tests/patchparser.py
@@ -20,6 +20,7 @@ 
 import unittest
 import os
 from email import message_from_string
+import settings
 from patchwork.models import Project, Person, Patch, Comment
 from patchwork.tests.utils import read_patch, read_mail, create_email, defaults
 
@@ -34,8 +35,9 @@  class PatchTest(unittest.TestCase):
     default_subject = defaults.subject
     project = defaults.project
 
-from patchwork.bin.parsemail import find_content, find_author, find_project, \
-                                    parse_mail
+from patchwork.bin.parsemail import (
+    extract_email_addresses, find_content, find_author, find_project,
+    parse_mail)
 
 class InlinePatchTest(PatchTest):
     patch_filename = '0001-add-line.patch'
@@ -299,18 +301,56 @@  class MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
             # and the one we parsed in setUp()
             self.assertEquals(Comment.objects.filter(patch = patch).count(), 2)
 
-class ListIdHeaderTest(unittest.TestCase):
-    """ Test that we parse List-Id headers from mails correctly """
+class EmailProjectGuessing(unittest.TestCase):
+    """Projects are guessed based on List-Id headers or recipient addresses"""
     def setUp(self):
+        self.orig_fallback_to_listemail = \
+            settings.PATCHWORK_FALLBACK_TO_LISTEMAIL
+        settings.PATCHWORK_FALLBACK_TO_LISTEMAIL = False
         self.project = Project(linkname = 'test-project-1', name = 'Project 1',
                 listid = '1.example.com', listemail='1@example.com')
         self.project.save()
 
+    def testExtractingEmailAddressesFromRecipientsList(self):
+        emails = extract_email_addresses(
+            '"Foo Bar" <foo.bar@example.com>,'
+            '<baz+list@foo.example.com>,'
+            'bar-foo@bar.foo.example.com,'
+            # Notice that this one is not a valid email address.
+            'bar-foo@.com')
+        self.assertEqual(
+            ['foo.bar@example.com',
+             'baz+list@foo.example.com',
+             'bar-foo@bar.foo.example.com'],
+            emails)
+
+    def testDoNotFallbackToEmailAddressWhenNotConfiguredTo(self):
+        self.assertFalse(settings.PATCHWORK_FALLBACK_TO_LISTEMAIL)
+        email = MIMEText('')
+        email['To'] = '"First dev list" <1@example.com>'
+        project = find_project(email)
+        self.assertEquals(None, project)
+
     def testNoListId(self):
         email = MIMEText('')
         project = find_project(email)
         self.assertEquals(project, None)
 
+    def testNoListIdWithListEmailAsRecipient(self):
+        settings.PATCHWORK_FALLBACK_TO_LISTEMAIL = True
+        email = MIMEText('')
+        email['To'] = '"First dev list" <1@example.com>'
+        project = find_project(email)
+        self.assertEquals(self.project, project)
+
+    def testNoListIdWithListEmailAsCC(self):
+        settings.PATCHWORK_FALLBACK_TO_LISTEMAIL = True
+        email = MIMEText('')
+        email['CC'] = ('"First maintainer <maintainer@example.com>, '
+                       '"First dev list" <1@example.com>')
+        project = find_project(email)
+        self.assertEquals(self.project, project)
+
     def testBlankListId(self):
         email = MIMEText('')
         email['List-Id'] = ''
@@ -344,6 +384,8 @@  class ListIdHeaderTest(unittest.TestCase):
         self.assertEquals(project, self.project)
 
     def tearDown(self):
+        settings.PATCHWORK_FALLBACK_TO_LISTEMAIL = \
+            self.orig_fallback_to_listemail
         self.project.delete()
 
 
diff --git a/apps/settings.py b/apps/settings.py
index f56da70..fd234af 100644
--- a/apps/settings.py
+++ b/apps/settings.py
@@ -101,6 +101,9 @@  INSTALLED_APPS = (
 
 DEFAULT_PATCHES_PER_PAGE = 100
 DEFAULT_FROM_EMAIL = 'Patchwork <patchwork@patchwork.example.com>'
+# If set to True, this will cause the parsemail script to lookup projects
+# by email address when one cannot be found by list ID.
+PATCHWORK_FALLBACK_TO_LISTEMAIL = False
 
 ACCOUNT_ACTIVATION_DAYS = 7