From patchwork Fri May 13 07:39:45 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Guilherme Salgado X-Patchwork-Id: 95445 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from ozlabs.org (localhost [IPv6:::1]) by ozlabs.org (Postfix) with ESMTP id 2C013B6F16 for ; Fri, 13 May 2011 17:39:51 +1000 (EST) Received: from adelie.canonical.com (adelie.canonical.com [91.189.90.139]) by ozlabs.org (Postfix) with ESMTP id 0424FB6F10 for ; Fri, 13 May 2011 17:39:49 +1000 (EST) Received: from youngberry.canonical.com ([91.189.89.112]) by adelie.canonical.com with esmtp (Exim 4.71 #1 (Debian)) id 1QKmyQ-0007L1-OD; Fri, 13 May 2011 07:39:46 +0000 Received: from business-89-133-214-82.business.broadband.hu ([89.133.214.82] helo=feioso) by youngberry.canonical.com with esmtpsa (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1QKmyQ-0004rE-Ha; Fri, 13 May 2011 07:39:46 +0000 Received: from localhost6.localdomain6 (localhost.localdomain [127.0.0.1]) by feioso (Postfix) with ESMTP id 92CB34067D; Fri, 13 May 2011 09:39:45 +0200 (CEST) Subject: [PATCH V2] Prefer patch subject/author when they're specified in the body of the email To: patchwork@lists.ozlabs.org From: Guilherme Salgado Date: Fri, 13 May 2011 09:39:45 +0200 Message-ID: <20110513073846.26769.11090.stgit@localhost6.localdomain6> User-Agent: StGit/0.15 MIME-Version: 1.0 Cc: patches@linaro.org X-BeenThere: patchwork@lists.ozlabs.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: Patchwork development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org To be consistent with git-am, use the subject (extracted from the first line, when it begins with 'Subject:') and the author (extracted from the first or second line, when it begins with 'From:') and use that as Patch.name and Patch.author respectively. For more details on the exact format that is supported, check git-am's manpage. Signed-off-by: Guilherme Salgado --- apps/patchwork/bin/parsemail.py | 92 ++++++++++++++++++++++++------ apps/patchwork/models.py | 4 + apps/patchwork/tests/factory.py | 8 ++- apps/patchwork/tests/patchparser.py | 99 +++++++++++++++++++++++++++++--- lib/sql/migration/009-patch-author.sql | 5 ++ 5 files changed, 178 insertions(+), 30 deletions(-) create mode 100644 lib/sql/migration/009-patch-author.sql diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index bf05218..65ede37 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -108,11 +108,7 @@ def find_project(mail): project = find_project_by_list_address(mail) return project -def find_author(mail): - - from_header = clean_header(mail.get('From')) - (name, email) = (None, None) - +def parse_name_and_email(text): # tuple of (regex, fn) # - where fn returns a (name, email) tuple from the match groups resulting # from re.match().groups() @@ -128,20 +124,26 @@ def find_author(mail): ] for regex, fn in from_res: - match = regex.match(from_header) + match = regex.match(text) if match: - (name, email) = fn(match.groups()) - break + name, email = fn(match.groups()) + if name is not None: + name = name.strip() + if email is not None: + email = email.strip() + return name, email + return None, None + + +def find_submitter(mail): + + from_header = clean_header(mail.get('From')) + name, email = parse_name_and_email(from_header) if email is None: raise Exception("Could not parse From: header") - email = email.strip() - if name is not None: - name = name.strip() - new_person = False - try: person = Person.objects.get(email__iexact = email) except Person.DoesNotExist: @@ -228,6 +230,54 @@ def find_content(project, mail): return (patch, comment) +def find_patch_name(comment): + if not comment: + return None + first_line = comment.content.split('\n', 1)[0] + if first_line.startswith('Subject:'): + return first_line.replace('Subject:', '').strip() + return None + +def find_author(comment): + """Return a Person entry for the author specified in the given comment. + + The author of a patch is sometimes not the person who's submitting it, and + in these cases it is specified in the body of the email, either in the + first or the second line, using the following format: + + From: Somebody + + This is the format understood by 'git-am', which is described in detail in + its manpage. + """ + if not comment: + return None + lines = comment.content.split('\n') + if len(lines) == 0: + return None + # Append an extra line at the end so that our code can assume there will + # always be at least two lines, simplifying things significantly. + lines.append('') + first_line, second_line = lines[:2] + if first_line.startswith('From:'): + author_line = first_line + elif (first_line.startswith('Subject:') and + second_line.startswith('From:')): + author_line = second_line + else: + return None + + name, email = parse_name_and_email(author_line.replace('From:', '')) + if email is None: + return None + + try: + person = Person.objects.get(email__iexact=email) + except Person.DoesNotExist: + person = Person(name=name, email=email) + person.save() + return person + def find_patch_for_comment(project, mail): # construct a list of possible reply message ids refs = [] @@ -408,16 +458,22 @@ def parse_mail(mail): msgid = mail.get('Message-Id').strip() - (author, save_required) = find_author(mail) + (submitter, save_required) = find_submitter(mail) (patch, comment) = find_content(project, mail) if patch: # we delay the saving until we know we have a patch. if save_required: - author.save() + submitter.save() save_required = False - patch.submitter = author + patch.submitter = submitter + author = find_author(comment) + if author is not None: + patch.author = author + name = find_patch_name(comment) + if name is not None: + patch.name = name patch.msgid = msgid patch.project = project patch.state = get_state(mail.get('X-Patchwork-State', '').strip()) @@ -430,12 +486,12 @@ def parse_mail(mail): if comment: if save_required: - author.save() + submitter.save() # looks like the original constructor for Comment takes the pk # when the Comment is created. reset it here. if patch: comment.patch = patch - comment.submitter = author + comment.submitter = submitter comment.msgid = msgid try: comment.save() diff --git a/apps/patchwork/models.py b/apps/patchwork/models.py index 2b5d429..cfead27 100644 --- a/apps/patchwork/models.py +++ b/apps/patchwork/models.py @@ -203,6 +203,7 @@ class Patch(models.Model): name = models.CharField(max_length=255) date = models.DateTimeField(default=datetime.datetime.now) submitter = models.ForeignKey(Person) + author = models.ForeignKey(Person, related_name='author_id') delegate = models.ForeignKey(User, blank = True, null = True) state = models.ForeignKey(State) archived = models.BooleanField(default = False) @@ -224,6 +225,9 @@ class Patch(models.Model): except: self.state = State.objects.get(ordering = 0) + if self.author_id is None: + self.author = self.submitter + if self.hash is None and self.content is not None: self.hash = hash_patch(self.content).hexdigest() diff --git a/apps/patchwork/tests/factory.py b/apps/patchwork/tests/factory.py index 1eed452..1ece6ad 100644 --- a/apps/patchwork/tests/factory.py +++ b/apps/patchwork/tests/factory.py @@ -93,14 +93,16 @@ class ObjectFactory(object): bundle.save() return bundle - def makePatch(self, project = None, submitter = None, date = None, - content = None): + def makePatch(self, project = None, submitter = None, author = None, + date = None, content = None): if date is None: date = datetime.now() if project is None: project = self.makeProject() if submitter is None: submitter = self.makePerson() + if author is None: + author = submitter if content is None: content = textwrap.dedent("""\ --- a/apps/patchwork/bin/parsemail.py @@ -112,7 +114,7 @@ class ObjectFactory(object): - mail_headers(mail) """) msgid = email.utils.make_msgid(idstring = self.getUniqueString()) - patch = Patch(project = project, msgid = msgid, + patch = Patch(project = project, msgid = msgid, author = author, name = self.getUniqueString(), submitter = submitter, date = date, content = content) patch.save() diff --git a/apps/patchwork/tests/patchparser.py b/apps/patchwork/tests/patchparser.py index d141412..59287b7 100644 --- a/apps/patchwork/tests/patchparser.py +++ b/apps/patchwork/tests/patchparser.py @@ -17,11 +17,12 @@ # along with Patchwork; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import textwrap import unittest -import os -from email import message_from_string +from email import message_from_string, utils import settings from patchwork.models import Project, Person, Patch, Comment +from patchwork.tests.factory import factory from patchwork.tests.utils import read_patch, read_mail, create_email, defaults try: @@ -36,7 +37,7 @@ class PatchTest(unittest.TestCase): project = defaults.project from patchwork.bin.parsemail import ( - extract_email_addresses, find_content, find_author, find_project, + extract_email_addresses, find_content, find_submitter, find_project, parse_mail) class InlinePatchTest(PatchTest): @@ -143,7 +144,7 @@ class SenderEncodingTest(unittest.TestCase): 'Subject: test\n\n' + \ 'test' self.email = message_from_string(mail) - (self.person, new) = find_author(self.email) + (self.person, new) = find_submitter(self.email) self.person.save() def tearDown(self): @@ -209,28 +210,28 @@ class SenderCorrelationTest(unittest.TestCase): def setUp(self): self.existing_sender_mail = self.mail(self.existing_sender) self.non_existing_sender_mail = self.mail(self.non_existing_sender) - (self.person, new) = find_author(self.existing_sender_mail) + (self.person, new) = find_submitter(self.existing_sender_mail) self.person.save() def testExisingSender(self): - (person, new) = find_author(self.existing_sender_mail) + (person, new) = find_submitter(self.existing_sender_mail) self.assertEqual(new, False) self.assertEqual(person.id, self.person.id) def testNonExisingSender(self): - (person, new) = find_author(self.non_existing_sender_mail) + (person, new) = find_submitter(self.non_existing_sender_mail) self.assertEqual(new, True) self.assertEqual(person.id, None) def testExistingDifferentFormat(self): mail = self.mail('existing@example.com') - (person, new) = find_author(mail) + (person, new) = find_submitter(mail) self.assertEqual(new, False) self.assertEqual(person.id, self.person.id) def testExistingDifferentCase(self): mail = self.mail(self.existing_sender.upper()) - (person, new) = find_author(mail) + (person, new) = find_submitter(mail) self.assertEqual(new, False) self.assertEqual(person.id, self.person.id) @@ -301,6 +302,86 @@ class MultipleProjectPatchCommentTest(MultipleProjectPatchTest): # and the one we parsed in setUp() self.assertEquals(Comment.objects.filter(patch = patch).count(), 2) + +class TestAuthorParsing(unittest.TestCase): + + submitter_email = 'barfoorino@example.com' + submitter = 'Bar Foorino <%s>' % submitter_email + diff = textwrap.dedent(""" + --- + diff --git a/omap_device.h b/omap_device.h + --- a/omap_device.h + +++ b/omap_device.h + @@ -50,1 +50,2 @@ extern struct device omap_device_parent; + * @hwmods: (one .. many per omap_device) + + * @set_rate: fn ptr to change the operating rate. + """) + + def test_patch_author_parsing(self): + # If the first line in the body of an email starts with 'From:' and + # contains an email address, we'll use that address to lookup the + # patch author, creating a new Person to represent it if necessary. + content = textwrap.dedent(""" + From: Foo Barino + + This patch does this and that. + """) + msgid = self.make_email_and_parse_it(content + self.diff) + patch = Patch.objects.get(msgid=msgid) + self.assertEqual(self.submitter_email, patch.submitter.email) + self.assertEqual('author@example.com', patch.author.email) + self.assertEqual('Foo Barino', patch.author.name) + + def test_patch_author_and_name_parsing(self): + # If the first line in the body of an email starts with 'Subject:', + # we'll use that as the patch name and look for an author on the next + # line (identified by a 'From:' string at the start). + content = textwrap.dedent(""" + Subject: A patch for this and that + From: Foo Barino + + This patch does this and that. + """) + msgid = self.make_email_and_parse_it(content + self.diff) + patch = Patch.objects.get(msgid=msgid) + self.assertEqual(self.submitter_email, patch.submitter.email) + self.assertEqual('A patch for this and that', patch.name) + self.assertEqual('author@example.com', patch.author.email) + + def test_patch_author_parsing_from_email_without_patch(self): + content = textwrap.dedent(""" + From: Foo Barino + + This patch does this and that but I forgot to include it here. + """) + msgid = self.make_email_and_parse_it(content) + self.assertRaises(Patch.DoesNotExist, Patch.objects.get, msgid=msgid) + + def test_patch_author_parsing_when_author_line_is_not_the_first(self): + # In this case the 'From:' line is not the first one in the email, so + # it's discarded and we use the patch submitter as its author. + content = textwrap.dedent(""" + Hey, notice that I'm submitting this on behalf of Foo Barino! + + From: Foo Barino + """) + msgid = self.make_email_and_parse_it(content + self.diff) + patch = Patch.objects.get(msgid=msgid) + self.assertEqual(self.submitter_email, patch.submitter.email) + self.assertEqual(self.submitter_email, patch.author.email) + + def make_email_and_parse_it(self, content): + project = factory.makeProject() + msgid = utils.make_msgid(idstring=factory.getUniqueString()) + email = MIMEText(content) + email['From'] = self.submitter + email['Subject'] = 'Whatever' + email['Message-Id'] = msgid + email['List-ID'] = '<' + project.listid + '>' + parse_mail(email) + return msgid + + class EmailProjectGuessing(unittest.TestCase): """Projects are guessed based on List-Id headers or recipient addresses""" def setUp(self): diff --git a/lib/sql/migration/009-patch-author.sql b/lib/sql/migration/009-patch-author.sql new file mode 100644 index 0000000..ae67244 --- /dev/null +++ b/lib/sql/migration/009-patch-author.sql @@ -0,0 +1,5 @@ +BEGIN; +ALTER TABLE patchwork_patch ADD column "author_id" integer REFERENCES "patchwork_person" ("id") DEFERRABLE INITIALLY DEFERRED; +UPDATE patchwork_patch SET author_id = submitter_id; +ALTER TABLE patchwork_patch ALTER COLUMN author_id SET NOT NULL; +COMMIT;