diff mbox series

[6/9] parser: close a TOCTTOU bug on Person creation

Message ID 20180221141716.10908-7-dja@axtens.net
State Changes Requested
Headers show
Series Tools and fixes for parallel parsing | expand

Checks

Context Check Description
dja/snowpatch-0_1_0 success master/apply_patch Successfully applied
dja/snowpatch-snowpatch_job_snowpatch-patchwork success Test snowpatch/job/snowpatch-patchwork on branch master

Commit Message

Daniel Axtens Feb. 21, 2018, 2:17 p.m. UTC
find_author looks up a person by email, and if they do not exist,
creates a Person model, which may be saved later if the message
contains something valuable.

Multiple simultaneous processes can race here: both can do the SELECT,
find there is no Person, and create the model. One will succeed in
saving, the other will get an IntegrityError.

Reduce the window by making find_author into get_or_create_author, and
plumb that through. (Remove a test that specifically required find_author
to *not* create).

More importantly, cover the case where we lose the race, by catching the
IntegrityError and fetching the winning Person model.

Signed-off-by: Daniel Axtens <dja@axtens.net>
---
 patchwork/parser.py            | 32 ++++++++++++++++++--------------
 patchwork/tests/test_parser.py | 37 +++++++++++++++----------------------
 2 files changed, 33 insertions(+), 36 deletions(-)

Comments

Andrew Donnellan Feb. 22, 2018, 4:44 a.m. UTC | #1
On 22/02/18 01:17, Daniel Axtens wrote:
> find_author looks up a person by email, and if they do not exist,
> creates a Person model, which may be saved later if the message
> contains something valuable.
> 
> Multiple simultaneous processes can race here: both can do the SELECT,
> find there is no Person, and create the model. One will succeed in
> saving, the other will get an IntegrityError.
> 
> Reduce the window by making find_author into get_or_create_author, and
> plumb that through. (Remove a test that specifically required find_author
> to *not* create).
> 
> More importantly, cover the case where we lose the race, by catching the
> IntegrityError and fetching the winning Person model.
> 
> Signed-off-by: Daniel Axtens <dja@axtens.net>

LGTM apart from one comment below

Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>


> ---
>   patchwork/parser.py            | 32 ++++++++++++++++++--------------
>   patchwork/tests/test_parser.py | 37 +++++++++++++++----------------------
>   2 files changed, 33 insertions(+), 36 deletions(-)
> 
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index cbf88fe4e464..1f3b3dd8901f 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -29,6 +29,7 @@ import logging
>   import re
> 
>   from django.contrib.auth.models import User
> +from django.db import IntegrityError
>   from django.utils import six
> 
>   from patchwork.models import Comment
> @@ -241,7 +242,7 @@ def _find_series_by_references(project, mail):
>               continue
> 
> 
> -def _find_series_by_markers(project, mail):
> +def _find_series_by_markers(project, mail, author):
>       """Find a patch's series using series markers and sender.
> 
>       Identify suitable series for a patch using a combination of the
> @@ -262,7 +263,6 @@ def _find_series_by_markers(project, mail):
>       still won't help us if someone spams the mailing list with
>       duplicate series but that's a tricky situation for anyone to parse.
>       """
> -    author = find_author(mail)
> 
>       subject = mail.get('Subject')
>       name, prefixes = clean_subject(subject, [project.linkname])
> @@ -282,7 +282,7 @@ def _find_series_by_markers(project, mail):
>           return
> 
> 
> -def find_series(project, mail):
> +def find_series(project, mail, author):
>       """Find a series, if any, for a given patch.
> 
>       Args:
> @@ -297,10 +297,10 @@ def find_series(project, mail):
>       if series:
>           return series
> 
> -    return _find_series_by_markers(project, mail)
> +    return _find_series_by_markers(project, mail, author)
> 
> 
> -def find_author(mail):
> +def get_or_create_author(mail):
>       from_header = clean_header(mail.get('From'))
> 
>       if not from_header:
> @@ -342,11 +342,16 @@ def find_author(mail):
>           name = name.strip()[:255]
> 
>       try:
> +        person = Person.objects.get_or_create(email__iexact=email,
> +                                              defaults={'name': name,
> +                                                        'email': email})[0]
> +    except IntegrityError:
> +        # we lost the race to create the person
>           person = Person.objects.get(email__iexact=email)

get_or_create() should handle the IntegrityError in there for you.

> -        if name:  # use the latest provided name
> -            person.name = name
> -    except Person.DoesNotExist:
> -        person = Person(name=name, email=email)
> +
> +    if name:  # use the latest provided name
> +        person.name = name
> +        person.save()
> 
>       return person
> 
> @@ -947,7 +952,6 @@ def parse_mail(mail, list_id=None):
>           raise ValueError("Broken 'Message-Id' header")
>       msgid = msgid[:255]
> 
> -    author = find_author(mail)
>       subject = mail.get('Subject')
>       name, prefixes = clean_subject(subject, [project.linkname])
>       is_comment = subject_check(subject)
> @@ -973,7 +977,7 @@ def parse_mail(mail, list_id=None):
> 
>       if not is_comment and (diff or pull_url):  # patches or pull requests
>           # we delay the saving until we know we have a patch.
> -        author.save()
> +        author = get_or_create_author(mail)
> 
>           delegate = find_delegate_by_header(mail)
>           if not delegate and diff:
> @@ -984,7 +988,7 @@ def parse_mail(mail, list_id=None):
>           # series to match against.
>           series = None
>           if n:
> -            series = find_series(project, mail)
> +            series = find_series(project, mail, author)
>           else:
>               x = n = 1
> 
> @@ -1061,7 +1065,7 @@ def parse_mail(mail, list_id=None):
>                   is_cover_letter = True
> 
>           if is_cover_letter:
> -            author.save()
> +            author = get_or_create_author(mail)
> 
>               # we don't use 'find_series' here as a cover letter will
>               # always be the first item in a thread, thus the references
> @@ -1109,7 +1113,7 @@ def parse_mail(mail, list_id=None):
>       if not submission:
>           return
> 
> -    author.save()
> +    author = get_or_create_author(mail)
> 
>       comment = Comment(
>           submission=submission,
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index 68bcb937b273..6cfe7a484443 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -34,7 +34,7 @@ from patchwork.models import Patch
>   from patchwork.models import Person
>   from patchwork.models import State
>   from patchwork.parser import clean_subject
> -from patchwork.parser import find_author
> +from patchwork.parser import get_or_create_author
>   from patchwork.parser import find_patch_content as find_content
>   from patchwork.parser import find_project_by_header
>   from patchwork.parser import find_series
> @@ -225,7 +225,7 @@ class SenderEncodingTest(TestCase):
> 
>       def _test_encoding(self, from_header, sender_name, sender_email):
>           email = self._create_email(from_header)
> -        person = find_author(email)
> +        person = get_or_create_author(email)
>           person.save()
> 
>           # ensure it was parsed correctly
> @@ -241,7 +241,7 @@ class SenderEncodingTest(TestCase):
>       def test_empty(self):
>           email = self._create_email('')
>           with self.assertRaises(ValueError):
> -            find_author(email)
> +            get_or_create_author(email)
> 
>       def test_ascii_encoding(self):
>           from_header = 'example user <user@example.com>'
> @@ -269,7 +269,7 @@ class SenderEncodingTest(TestCase):
> 
> 
>   class SenderCorrelationTest(TestCase):
> -    """Validate correct behavior of the find_author case.
> +    """Validate correct behavior of the get_or_create_author case.
> 
>       Relies of checking the internal state of a Django model object.
> 
> @@ -284,25 +284,16 @@ class SenderCorrelationTest(TestCase):
>                  'test\n'
>           return message_from_string(mail)
> 
> -    def test_non_existing_sender(self):
> -        sender = 'Non-existing Sender <nonexisting@example.com>'
> -        mail = self._create_email(sender)
> -
> -        # don't create the person - attempt to find immediately
> -        person = find_author(mail)
> -        self.assertEqual(person._state.adding, True)
> -        self.assertEqual(person.id, None)
> -
>       def test_existing_sender(self):
>           sender = 'Existing Sender <existing@example.com>'
>           mail = self._create_email(sender)
> 
>           # create the person first
> -        person_a = find_author(mail)
> +        person_a = get_or_create_author(mail)
>           person_a.save()
> 
>           # then attempt to parse email with the same 'From' line
> -        person_b = find_author(mail)
> +        person_b = get_or_create_author(mail)
>           self.assertEqual(person_b._state.adding, False)
>           self.assertEqual(person_b.id, person_a.id)
> 
> @@ -311,12 +302,12 @@ class SenderCorrelationTest(TestCase):
>           mail = self._create_email(sender)
> 
>           # create the person first
> -        person_a = find_author(mail)
> +        person_a = get_or_create_author(mail)
>           person_a.save()
> 
>           # then attempt to parse email with a new 'From' line
>           mail = self._create_email('existing@example.com')
> -        person_b = find_author(mail)
> +        person_b = get_or_create_author(mail)
>           self.assertEqual(person_b._state.adding, False)
>           self.assertEqual(person_b.id, person_a.id)
> 
> @@ -324,11 +315,11 @@ class SenderCorrelationTest(TestCase):
>           sender = 'Existing Sender <existing@example.com>'
>           mail = self._create_email(sender)
> 
> -        person_a = find_author(mail)
> +        person_a = get_or_create_author(mail)
>           person_a.save()
> 
>           mail = self._create_email(sender.upper())
> -        person_b = find_author(mail)
> +        person_b = get_or_create_author(mail)
>           self.assertEqual(person_b._state.adding, False)
>           self.assertEqual(person_b.id, person_a.id)
> 
> @@ -361,7 +352,8 @@ class SeriesCorrelationTest(TestCase):
>           email = self._create_email(msgid)
>           project = create_project()
> 
> -        self.assertIsNone(find_series(project, email))
> +        self.assertIsNone(find_series(project, email,
> +                                      get_or_create_author(email)))
> 
>       def test_first_reply(self):
>           msgid_a = make_msgid()
> @@ -371,7 +363,8 @@ class SeriesCorrelationTest(TestCase):
>           # assume msgid_a was already handled
>           ref = create_series_reference(msgid=msgid_a)
> 
> -        series = find_series(ref.series.project, email)
> +        series = find_series(ref.series.project, email,
> +                             get_or_create_author(email))
>           self.assertEqual(series, ref.series)
> 
>       def test_nested_series(self):
> @@ -395,7 +388,7 @@ class SeriesCorrelationTest(TestCase):
>           # ...and the "first patch" of this new series
>           msgid = make_msgid()
>           email = self._create_email(msgid, msgids)
> -        series = find_series(project, email)
> +        series = find_series(project, email, get_or_create_author(email))
> 
>           # this should link to the second series - not the first
>           self.assertEqual(len(msgids), 4 + 1)  # old series + new cover
>
Daniel Axtens Feb. 24, 2018, 2:22 p.m. UTC | #2
Hi Andrew,

>>       try:
>> +        person = Person.objects.get_or_create(email__iexact=email,
>> +                                              defaults={'name': name,
>> +                                                        'email': email})[0]
>> +    except IntegrityError:
>> +        # we lost the race to create the person
>>           person = Person.objects.get(email__iexact=email)
>
> get_or_create() should handle the IntegrityError in there for you.

Upon inspection, yes it does, although *not* - annoyingly - in
get_or_create itself, but in _create_object_from_params. Indeed, if you
just read g_o_c, you can easily convince yourself it's not
parallel-safe, even though it is. *sigh*

Anyway, you are right and I have fixed this up for v2.

Regards,
Daniel

>
>> -        if name:  # use the latest provided name
>> -            person.name = name
>> -    except Person.DoesNotExist:
>> -        person = Person(name=name, email=email)
>> +
>> +    if name:  # use the latest provided name
>> +        person.name = name
>> +        person.save()
>> 
>>       return person
>> 
>> @@ -947,7 +952,6 @@ def parse_mail(mail, list_id=None):
>>           raise ValueError("Broken 'Message-Id' header")
>>       msgid = msgid[:255]
>> 
>> -    author = find_author(mail)
>>       subject = mail.get('Subject')
>>       name, prefixes = clean_subject(subject, [project.linkname])
>>       is_comment = subject_check(subject)
>> @@ -973,7 +977,7 @@ def parse_mail(mail, list_id=None):
>> 
>>       if not is_comment and (diff or pull_url):  # patches or pull requests
>>           # we delay the saving until we know we have a patch.
>> -        author.save()
>> +        author = get_or_create_author(mail)
>> 
>>           delegate = find_delegate_by_header(mail)
>>           if not delegate and diff:
>> @@ -984,7 +988,7 @@ def parse_mail(mail, list_id=None):
>>           # series to match against.
>>           series = None
>>           if n:
>> -            series = find_series(project, mail)
>> +            series = find_series(project, mail, author)
>>           else:
>>               x = n = 1
>> 
>> @@ -1061,7 +1065,7 @@ def parse_mail(mail, list_id=None):
>>                   is_cover_letter = True
>> 
>>           if is_cover_letter:
>> -            author.save()
>> +            author = get_or_create_author(mail)
>> 
>>               # we don't use 'find_series' here as a cover letter will
>>               # always be the first item in a thread, thus the references
>> @@ -1109,7 +1113,7 @@ def parse_mail(mail, list_id=None):
>>       if not submission:
>>           return
>> 
>> -    author.save()
>> +    author = get_or_create_author(mail)
>> 
>>       comment = Comment(
>>           submission=submission,
>> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
>> index 68bcb937b273..6cfe7a484443 100644
>> --- a/patchwork/tests/test_parser.py
>> +++ b/patchwork/tests/test_parser.py
>> @@ -34,7 +34,7 @@ from patchwork.models import Patch
>>   from patchwork.models import Person
>>   from patchwork.models import State
>>   from patchwork.parser import clean_subject
>> -from patchwork.parser import find_author
>> +from patchwork.parser import get_or_create_author
>>   from patchwork.parser import find_patch_content as find_content
>>   from patchwork.parser import find_project_by_header
>>   from patchwork.parser import find_series
>> @@ -225,7 +225,7 @@ class SenderEncodingTest(TestCase):
>> 
>>       def _test_encoding(self, from_header, sender_name, sender_email):
>>           email = self._create_email(from_header)
>> -        person = find_author(email)
>> +        person = get_or_create_author(email)
>>           person.save()
>> 
>>           # ensure it was parsed correctly
>> @@ -241,7 +241,7 @@ class SenderEncodingTest(TestCase):
>>       def test_empty(self):
>>           email = self._create_email('')
>>           with self.assertRaises(ValueError):
>> -            find_author(email)
>> +            get_or_create_author(email)
>> 
>>       def test_ascii_encoding(self):
>>           from_header = 'example user <user@example.com>'
>> @@ -269,7 +269,7 @@ class SenderEncodingTest(TestCase):
>> 
>> 
>>   class SenderCorrelationTest(TestCase):
>> -    """Validate correct behavior of the find_author case.
>> +    """Validate correct behavior of the get_or_create_author case.
>> 
>>       Relies of checking the internal state of a Django model object.
>> 
>> @@ -284,25 +284,16 @@ class SenderCorrelationTest(TestCase):
>>                  'test\n'
>>           return message_from_string(mail)
>> 
>> -    def test_non_existing_sender(self):
>> -        sender = 'Non-existing Sender <nonexisting@example.com>'
>> -        mail = self._create_email(sender)
>> -
>> -        # don't create the person - attempt to find immediately
>> -        person = find_author(mail)
>> -        self.assertEqual(person._state.adding, True)
>> -        self.assertEqual(person.id, None)
>> -
>>       def test_existing_sender(self):
>>           sender = 'Existing Sender <existing@example.com>'
>>           mail = self._create_email(sender)
>> 
>>           # create the person first
>> -        person_a = find_author(mail)
>> +        person_a = get_or_create_author(mail)
>>           person_a.save()
>> 
>>           # then attempt to parse email with the same 'From' line
>> -        person_b = find_author(mail)
>> +        person_b = get_or_create_author(mail)
>>           self.assertEqual(person_b._state.adding, False)
>>           self.assertEqual(person_b.id, person_a.id)
>> 
>> @@ -311,12 +302,12 @@ class SenderCorrelationTest(TestCase):
>>           mail = self._create_email(sender)
>> 
>>           # create the person first
>> -        person_a = find_author(mail)
>> +        person_a = get_or_create_author(mail)
>>           person_a.save()
>> 
>>           # then attempt to parse email with a new 'From' line
>>           mail = self._create_email('existing@example.com')
>> -        person_b = find_author(mail)
>> +        person_b = get_or_create_author(mail)
>>           self.assertEqual(person_b._state.adding, False)
>>           self.assertEqual(person_b.id, person_a.id)
>> 
>> @@ -324,11 +315,11 @@ class SenderCorrelationTest(TestCase):
>>           sender = 'Existing Sender <existing@example.com>'
>>           mail = self._create_email(sender)
>> 
>> -        person_a = find_author(mail)
>> +        person_a = get_or_create_author(mail)
>>           person_a.save()
>> 
>>           mail = self._create_email(sender.upper())
>> -        person_b = find_author(mail)
>> +        person_b = get_or_create_author(mail)
>>           self.assertEqual(person_b._state.adding, False)
>>           self.assertEqual(person_b.id, person_a.id)
>> 
>> @@ -361,7 +352,8 @@ class SeriesCorrelationTest(TestCase):
>>           email = self._create_email(msgid)
>>           project = create_project()
>> 
>> -        self.assertIsNone(find_series(project, email))
>> +        self.assertIsNone(find_series(project, email,
>> +                                      get_or_create_author(email)))
>> 
>>       def test_first_reply(self):
>>           msgid_a = make_msgid()
>> @@ -371,7 +363,8 @@ class SeriesCorrelationTest(TestCase):
>>           # assume msgid_a was already handled
>>           ref = create_series_reference(msgid=msgid_a)
>> 
>> -        series = find_series(ref.series.project, email)
>> +        series = find_series(ref.series.project, email,
>> +                             get_or_create_author(email))
>>           self.assertEqual(series, ref.series)
>> 
>>       def test_nested_series(self):
>> @@ -395,7 +388,7 @@ class SeriesCorrelationTest(TestCase):
>>           # ...and the "first patch" of this new series
>>           msgid = make_msgid()
>>           email = self._create_email(msgid, msgids)
>> -        series = find_series(project, email)
>> +        series = find_series(project, email, get_or_create_author(email))
>> 
>>           # this should link to the second series - not the first
>>           self.assertEqual(len(msgids), 4 + 1)  # old series + new cover
>> 
>
> -- 
> Andrew Donnellan              OzLabs, ADL Canberra
> andrew.donnellan@au1.ibm.com  IBM Australia Limited
diff mbox series

Patch

diff --git a/patchwork/parser.py b/patchwork/parser.py
index cbf88fe4e464..1f3b3dd8901f 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -29,6 +29,7 @@  import logging
 import re
 
 from django.contrib.auth.models import User
+from django.db import IntegrityError
 from django.utils import six
 
 from patchwork.models import Comment
@@ -241,7 +242,7 @@  def _find_series_by_references(project, mail):
             continue
 
 
-def _find_series_by_markers(project, mail):
+def _find_series_by_markers(project, mail, author):
     """Find a patch's series using series markers and sender.
 
     Identify suitable series for a patch using a combination of the
@@ -262,7 +263,6 @@  def _find_series_by_markers(project, mail):
     still won't help us if someone spams the mailing list with
     duplicate series but that's a tricky situation for anyone to parse.
     """
-    author = find_author(mail)
 
     subject = mail.get('Subject')
     name, prefixes = clean_subject(subject, [project.linkname])
@@ -282,7 +282,7 @@  def _find_series_by_markers(project, mail):
         return
 
 
-def find_series(project, mail):
+def find_series(project, mail, author):
     """Find a series, if any, for a given patch.
 
     Args:
@@ -297,10 +297,10 @@  def find_series(project, mail):
     if series:
         return series
 
-    return _find_series_by_markers(project, mail)
+    return _find_series_by_markers(project, mail, author)
 
 
-def find_author(mail):
+def get_or_create_author(mail):
     from_header = clean_header(mail.get('From'))
 
     if not from_header:
@@ -342,11 +342,16 @@  def find_author(mail):
         name = name.strip()[:255]
 
     try:
+        person = Person.objects.get_or_create(email__iexact=email,
+                                              defaults={'name': name,
+                                                        'email': email})[0]
+    except IntegrityError:
+        # we lost the race to create the person
         person = Person.objects.get(email__iexact=email)
-        if name:  # use the latest provided name
-            person.name = name
-    except Person.DoesNotExist:
-        person = Person(name=name, email=email)
+
+    if name:  # use the latest provided name
+        person.name = name
+        person.save()
 
     return person
 
@@ -947,7 +952,6 @@  def parse_mail(mail, list_id=None):
         raise ValueError("Broken 'Message-Id' header")
     msgid = msgid[:255]
 
-    author = find_author(mail)
     subject = mail.get('Subject')
     name, prefixes = clean_subject(subject, [project.linkname])
     is_comment = subject_check(subject)
@@ -973,7 +977,7 @@  def parse_mail(mail, list_id=None):
 
     if not is_comment and (diff or pull_url):  # patches or pull requests
         # we delay the saving until we know we have a patch.
-        author.save()
+        author = get_or_create_author(mail)
 
         delegate = find_delegate_by_header(mail)
         if not delegate and diff:
@@ -984,7 +988,7 @@  def parse_mail(mail, list_id=None):
         # series to match against.
         series = None
         if n:
-            series = find_series(project, mail)
+            series = find_series(project, mail, author)
         else:
             x = n = 1
 
@@ -1061,7 +1065,7 @@  def parse_mail(mail, list_id=None):
                 is_cover_letter = True
 
         if is_cover_letter:
-            author.save()
+            author = get_or_create_author(mail)
 
             # we don't use 'find_series' here as a cover letter will
             # always be the first item in a thread, thus the references
@@ -1109,7 +1113,7 @@  def parse_mail(mail, list_id=None):
     if not submission:
         return
 
-    author.save()
+    author = get_or_create_author(mail)
 
     comment = Comment(
         submission=submission,
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index 68bcb937b273..6cfe7a484443 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -34,7 +34,7 @@  from patchwork.models import Patch
 from patchwork.models import Person
 from patchwork.models import State
 from patchwork.parser import clean_subject
-from patchwork.parser import find_author
+from patchwork.parser import get_or_create_author
 from patchwork.parser import find_patch_content as find_content
 from patchwork.parser import find_project_by_header
 from patchwork.parser import find_series
@@ -225,7 +225,7 @@  class SenderEncodingTest(TestCase):
 
     def _test_encoding(self, from_header, sender_name, sender_email):
         email = self._create_email(from_header)
-        person = find_author(email)
+        person = get_or_create_author(email)
         person.save()
 
         # ensure it was parsed correctly
@@ -241,7 +241,7 @@  class SenderEncodingTest(TestCase):
     def test_empty(self):
         email = self._create_email('')
         with self.assertRaises(ValueError):
-            find_author(email)
+            get_or_create_author(email)
 
     def test_ascii_encoding(self):
         from_header = 'example user <user@example.com>'
@@ -269,7 +269,7 @@  class SenderEncodingTest(TestCase):
 
 
 class SenderCorrelationTest(TestCase):
-    """Validate correct behavior of the find_author case.
+    """Validate correct behavior of the get_or_create_author case.
 
     Relies of checking the internal state of a Django model object.
 
@@ -284,25 +284,16 @@  class SenderCorrelationTest(TestCase):
                'test\n'
         return message_from_string(mail)
 
-    def test_non_existing_sender(self):
-        sender = 'Non-existing Sender <nonexisting@example.com>'
-        mail = self._create_email(sender)
-
-        # don't create the person - attempt to find immediately
-        person = find_author(mail)
-        self.assertEqual(person._state.adding, True)
-        self.assertEqual(person.id, None)
-
     def test_existing_sender(self):
         sender = 'Existing Sender <existing@example.com>'
         mail = self._create_email(sender)
 
         # create the person first
-        person_a = find_author(mail)
+        person_a = get_or_create_author(mail)
         person_a.save()
 
         # then attempt to parse email with the same 'From' line
-        person_b = find_author(mail)
+        person_b = get_or_create_author(mail)
         self.assertEqual(person_b._state.adding, False)
         self.assertEqual(person_b.id, person_a.id)
 
@@ -311,12 +302,12 @@  class SenderCorrelationTest(TestCase):
         mail = self._create_email(sender)
 
         # create the person first
-        person_a = find_author(mail)
+        person_a = get_or_create_author(mail)
         person_a.save()
 
         # then attempt to parse email with a new 'From' line
         mail = self._create_email('existing@example.com')
-        person_b = find_author(mail)
+        person_b = get_or_create_author(mail)
         self.assertEqual(person_b._state.adding, False)
         self.assertEqual(person_b.id, person_a.id)
 
@@ -324,11 +315,11 @@  class SenderCorrelationTest(TestCase):
         sender = 'Existing Sender <existing@example.com>'
         mail = self._create_email(sender)
 
-        person_a = find_author(mail)
+        person_a = get_or_create_author(mail)
         person_a.save()
 
         mail = self._create_email(sender.upper())
-        person_b = find_author(mail)
+        person_b = get_or_create_author(mail)
         self.assertEqual(person_b._state.adding, False)
         self.assertEqual(person_b.id, person_a.id)
 
@@ -361,7 +352,8 @@  class SeriesCorrelationTest(TestCase):
         email = self._create_email(msgid)
         project = create_project()
 
-        self.assertIsNone(find_series(project, email))
+        self.assertIsNone(find_series(project, email,
+                                      get_or_create_author(email)))
 
     def test_first_reply(self):
         msgid_a = make_msgid()
@@ -371,7 +363,8 @@  class SeriesCorrelationTest(TestCase):
         # assume msgid_a was already handled
         ref = create_series_reference(msgid=msgid_a)
 
-        series = find_series(ref.series.project, email)
+        series = find_series(ref.series.project, email,
+                             get_or_create_author(email))
         self.assertEqual(series, ref.series)
 
     def test_nested_series(self):
@@ -395,7 +388,7 @@  class SeriesCorrelationTest(TestCase):
         # ...and the "first patch" of this new series
         msgid = make_msgid()
         email = self._create_email(msgid, msgids)
-        series = find_series(project, email)
+        series = find_series(project, email, get_or_create_author(email))
 
         # this should link to the second series - not the first
         self.assertEqual(len(msgids), 4 + 1)  # old series + new cover