diff mbox series

[v2] models: Use database constraints to prevent split Series

Message ID 20191017105755.29688-1-stephen@that.guru
State Superseded
Headers show
Series [v2] models: Use database constraints to prevent split Series | expand

Commit Message

Stephen Finucane Oct. 17, 2019, 10:57 a.m. UTC
Currently, the 'SeriesReference' object has a unique constraint on the
two fields it has, 'series', which is a foreign key to 'Series', and
'msgid'. This is the wrong constraint. What we actually want to enforce
is that a patch, cover letter or comment is referenced by a single
series, or rather a single series per project the submission appears on.
As such, we should be enforcing uniqueness on the msgid and the project
that the patch, cover letter or comment belongs to.

This requires adding a new field to the object, 'project', since it's
not possible to do something like the following:

  unique_together = [('msgid', 'series__project')]

This is detailed here [1]. In addition, the migration needs a precursor
migration step to merge any broken series.

[1] https://stackoverflow.com/a/4440189/613428

Signed-off-by: Stephen Finucane <stephen@that.guru>
Closes: #241
Cc: Daniel Axtens <dja@axtens.net>
Cc: Petr Vorel <petr.vorel@gmail.com>
---
 patchwork/migrations/0037_python_3.py         | 298 ++++++++++++++++++
 .../0038_unique_series_references.py          | 121 +++++++
 patchwork/models.py                           |   6 +-
 patchwork/parser.py                           | 122 +++----
 patchwork/tests/test_parser.py                |   9 +-
 patchwork/tests/utils.py                      |   6 +-
 6 files changed, 496 insertions(+), 66 deletions(-)
 create mode 100644 patchwork/migrations/0037_python_3.py
 create mode 100644 patchwork/migrations/0038_unique_series_references.py

Comments

Daniel Axtens Oct. 17, 2019, 10:48 p.m. UTC | #1
Hi Stephen,

Were you aiming this for 2.2 or for 3?

I'm thinking 3 but happy to be convinced otherwise.

I'll also open a topic branch for PW3 and start posting some of my
Python 2 removal patches.

Regards,
Daniel

> Currently, the 'SeriesReference' object has a unique constraint on the
> two fields it has, 'series', which is a foreign key to 'Series', and
> 'msgid'. This is the wrong constraint. What we actually want to enforce
> is that a patch, cover letter or comment is referenced by a single
> series, or rather a single series per project the submission appears on.
> As such, we should be enforcing uniqueness on the msgid and the project
> that the patch, cover letter or comment belongs to.
>
> This requires adding a new field to the object, 'project', since it's
> not possible to do something like the following:
>
>   unique_together = [('msgid', 'series__project')]
>
> This is detailed here [1]. In addition, the migration needs a precursor
> migration step to merge any broken series.
>
> [1] https://stackoverflow.com/a/4440189/613428
>
> Signed-off-by: Stephen Finucane <stephen@that.guru>
> Closes: #241
> Cc: Daniel Axtens <dja@axtens.net>
> Cc: Petr Vorel <petr.vorel@gmail.com>
> ---
>  patchwork/migrations/0037_python_3.py         | 298 ++++++++++++++++++
>  .../0038_unique_series_references.py          | 121 +++++++
>  patchwork/models.py                           |   6 +-
>  patchwork/parser.py                           | 122 +++----
>  patchwork/tests/test_parser.py                |   9 +-
>  patchwork/tests/utils.py                      |   6 +-
>  6 files changed, 496 insertions(+), 66 deletions(-)
>  create mode 100644 patchwork/migrations/0037_python_3.py
>  create mode 100644 patchwork/migrations/0038_unique_series_references.py
>
> diff --git patchwork/migrations/0037_python_3.py patchwork/migrations/0037_python_3.py
> new file mode 100644
> index 00000000..416a7045
> --- /dev/null
> +++ patchwork/migrations/0037_python_3.py
> @@ -0,0 +1,298 @@
> +import datetime
> +
> +from django.conf import settings
> +from django.db import migrations, models
> +import django.db.models.deletion
> +
> +import patchwork.models
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [('patchwork', '0036_project_commit_url_format')]
> +
> +    operations = [
> +        migrations.AlterField(
> +            model_name='check',
> +            name='context',
> +            field=models.SlugField(
> +                default='default',
> +                help_text='A label to discern check from checks of other testing systems.',  # noqa
> +                max_length=255,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='check',
> +            name='description',
> +            field=models.TextField(
> +                blank=True,
> +                help_text='A brief description of the check.',
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='check',
> +            name='state',
> +            field=models.SmallIntegerField(
> +                choices=[
> +                    (0, 'pending'),
> +                    (1, 'success'),
> +                    (2, 'warning'),
> +                    (3, 'fail'),
> +                ],
> +                default=0,
> +                help_text='The state of the check.',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='check',
> +            name='target_url',
> +            field=models.URLField(
> +                blank=True,
> +                help_text='The target URL to associate with this check. This should be specific to the patch.',  # noqa
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='comment',
> +            name='submission',
> +            field=models.ForeignKey(
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='comments',
> +                related_query_name='comment',
> +                to='patchwork.Submission',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='delegationrule',
> +            name='path',
> +            field=models.CharField(
> +                help_text='An fnmatch-style pattern to match filenames against.',  # noqa
> +                max_length=255,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='delegationrule',
> +            name='priority',
> +            field=models.IntegerField(
> +                default=0,
> +                help_text='The priority of the rule. Rules with a higher priority will override rules with lower priorities',  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='delegationrule',
> +            name='user',
> +            field=models.ForeignKey(
> +                help_text='A user to delegate the patch to.',
> +                on_delete=django.db.models.deletion.CASCADE,
> +                to=settings.AUTH_USER_MODEL,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='emailconfirmation',
> +            name='type',
> +            field=models.CharField(
> +                choices=[
> +                    ('userperson', 'User-Person association'),
> +                    ('registration', 'Registration'),
> +                    ('optout', 'Email opt-out'),
> +                ],
> +                max_length=20,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='category',
> +            field=models.CharField(
> +                choices=[
> +                    ('cover-created', 'Cover Letter Created'),
> +                    ('patch-created', 'Patch Created'),
> +                    ('patch-completed', 'Patch Completed'),
> +                    ('patch-state-changed', 'Patch State Changed'),
> +                    ('patch-delegated', 'Patch Delegate Changed'),
> +                    ('check-created', 'Check Created'),
> +                    ('series-created', 'Series Created'),
> +                    ('series-completed', 'Series Completed'),
> +                ],
> +                db_index=True,
> +                help_text='The category of the event.',
> +                max_length=20,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='cover',
> +            field=models.ForeignKey(
> +                blank=True,
> +                help_text='The cover letter that this event was created for.',
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.CoverLetter',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='date',
> +            field=models.DateTimeField(
> +                default=datetime.datetime.utcnow,
> +                help_text='The time this event was created.',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='patch',
> +            field=models.ForeignKey(
> +                blank=True,
> +                help_text='The patch that this event was created for.',
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.Patch',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='project',
> +            field=models.ForeignKey(
> +                help_text='The project that the events belongs to.',
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.Project',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='series',
> +            field=models.ForeignKey(
> +                blank=True,
> +                help_text='The series that this event was created for.',
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.Series',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='patch',
> +            name='number',
> +            field=models.PositiveSmallIntegerField(
> +                default=None,
> +                help_text='The number assigned to this patch in the series',
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='project',
> +            name='commit_url_format',
> +            field=models.CharField(
> +                blank=True,
> +                help_text='URL format for a particular commit. {} will be replaced by the commit SHA.',  # noqa
> +                max_length=2000,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='project',
> +            name='list_archive_url_format',
> +            field=models.CharField(
> +                blank=True,
> +                help_text="URL format for the list archive's Message-ID redirector. {} will be replaced by the Message-ID.",  # noqa
> +                max_length=2000,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='project',
> +            name='subject_match',
> +            field=models.CharField(
> +                blank=True,
> +                default='',
> +                help_text='Regex to match the subject against if only part of emails sent to the list belongs to this project. Will be used with IGNORECASE and MULTILINE flags. If rules for more projects match the first one returned from DB is chosen; empty field serves as a default for every email which has no other match.',  # noqa
> +                max_length=64,
> +                validators=[patchwork.models.validate_regex_compiles],
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='series',
> +            name='name',
> +            field=models.CharField(
> +                blank=True,
> +                help_text='An optional name to associate with the series, e.g. "John\'s PCI series".',  # noqa
> +                max_length=255,
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='series',
> +            name='total',
> +            field=models.IntegerField(
> +                help_text='Number of patches in series as indicated by the subject prefix(es)'  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='series',
> +            name='version',
> +            field=models.IntegerField(
> +                default=1,
> +                help_text='Version of series as indicated by the subject prefix(es)',  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='seriesreference',
> +            name='series',
> +            field=models.ForeignKey(
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='references',
> +                related_query_name='reference',
> +                to='patchwork.Series',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='tag',
> +            name='abbrev',
> +            field=models.CharField(
> +                help_text='Short (one-or-two letter) abbreviation for the tag, used in table column headers',  # noqa
> +                max_length=2,
> +                unique=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='tag',
> +            name='pattern',
> +            field=models.CharField(
> +                help_text='A simple regex to match the tag in the content of a message. Will be used with MULTILINE and IGNORECASE flags. eg. ^Acked-by:',  # noqa
> +                max_length=50,
> +                validators=[patchwork.models.validate_regex_compiles],
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='tag',
> +            name='show_column',
> +            field=models.BooleanField(
> +                default=True,
> +                help_text="Show a column displaying this tag's count in the patch list view",  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='userprofile',
> +            name='items_per_page',
> +            field=models.PositiveIntegerField(
> +                default=100, help_text='Number of items to display per page'
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='userprofile',
> +            name='send_email',
> +            field=models.BooleanField(
> +                default=False,
> +                help_text='Selecting this option allows patchwork to send email on your behalf',  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='userprofile',
> +            name='show_ids',
> +            field=models.BooleanField(
> +                default=False,
> +                help_text='Show click-to-copy patch IDs in the list view',
> +            ),
> +        ),
> +    ]
> diff --git patchwork/migrations/0038_unique_series_references.py patchwork/migrations/0038_unique_series_references.py
> new file mode 100644
> index 00000000..c5517ada
> --- /dev/null
> +++ patchwork/migrations/0038_unique_series_references.py
> @@ -0,0 +1,121 @@
> +from django.db import connection, migrations, models
> +from django.db.models import Count
> +import django.db.models.deletion
> +
> +
> +def merge_duplicate_series(apps, schema_editor):
> +    SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> +    Patch = apps.get_model('patchwork', 'Patch')
> +
> +    msgid_seriesrefs = {}
> +
> +    # find all SeriesReference that share a msgid but point to different series
> +    # and decide which of the series is going to be the authoritative one
> +    msgid_counts = (
> +        SeriesReference.objects.values('msgid')
> +        .annotate(count=Count('msgid'))
> +        .filter(count__gt=1)
> +    )
> +    for msgid_count in msgid_counts:
> +        msgid = msgid_count['msgid']
> +        chosen_ref = None
> +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> +            if series_ref.series.cover_letter:
> +                if chosen_ref:
> +                    # I don't think this can happen, but explode if it does
> +                    raise Exception(
> +                        "Looks like you've got two or more series that share "
> +                        "some patches but do not share a cover letter. Unable "
> +                        "to auto-resolve."
> +                    )
> +
> +                # if a series has a cover letter, that's the one we'll group
> +                # everything under
> +                chosen_ref = series_ref
> +
> +        if not chosen_ref:
> +            # if none of the series have cover letters, simply use the last
> +            # one (hint: this relies on Python's weird scoping for for loops
> +            # where 'series_ref' is still accessible outside the loop)
> +            chosen_ref = series_ref
> +
> +        msgid_seriesrefs[msgid] = chosen_ref
> +
> +    # reassign any patches referring to non-authoritative series to point to
> +    # the authoritative one, and delete the other series; we do this separately
> +    # to allow us a chance to raise the exception above if necessary
> +    for msgid, chosen_ref in msgid_seriesrefs.items():
> +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> +            if series_ref == chosen_ref:
> +                continue
> +
> +            # update the patches to point to our chosen series instead, on the
> +            # assumption that all other metadata is correct
> +            for patch in Patch.objects.filter(series=series_ref.series):
> +                patch.series = chosen_ref.series
> +                patch.save()
> +
> +            # delete the other series (which will delete the series ref)
> +            series_ref.series.delete()
> +
> +
> +def copy_project_field(apps, schema_editor):
> +    if connection.vendor == 'postgresql':
> +        schema_editor.execute(
> +            """
> +            UPDATE patchwork_seriesreference
> +              SET project_id = patchwork_series.project_id
> +            FROM patchwork_series
> +              WHERE patchwork_seriesreference.series_id = patchwork_series.id
> +        """
> +        )
> +    elif connection.vendor == 'mysql':
> +        schema_editor.execute(
> +            """
> +            UPDATE patchwork_seriesreference, patchwork_series
> +              SET patchwork_seriesreference.project_id = patchwork_series.project_id
> +            WHERE patchwork_seriesreference.series_id = patchwork_series.id
> +        """
> +        )  # noqa
> +    else:
> +        SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> +
> +        for series_ref in SeriesReference.objects.all().select_related(
> +            'series'
> +        ):
> +            series_ref.project = series_ref.series.project
> +            series_ref.save()
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [('patchwork', '0037_python_3')]
> +
> +    operations = [
> +        migrations.RunPython(
> +            merge_duplicate_series, migrations.RunPython.noop, atomic=False
> +        ),
> +        migrations.AddField(
> +            model_name='seriesreference',
> +            name='project',
> +            field=models.ForeignKey(
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                to='patchwork.Project',
> +            ),
> +        ),
> +        migrations.AlterUniqueTogether(
> +            name='seriesreference', unique_together={('project', 'msgid')}
> +        ),
> +        migrations.RunPython(
> +            copy_project_field, migrations.RunPython.noop, atomic=False
> +        ),
> +        migrations.AlterField(
> +            model_name='seriesreference',
> +            name='project',
> +            field=models.ForeignKey(
> +                on_delete=django.db.models.deletion.CASCADE,
> +                to='patchwork.Project',
> +            ),
> +        ),
> +    ]
> diff --git patchwork/models.py patchwork/models.py
> index c198bc2c..e5bfecf8 100644
> --- patchwork/models.py
> +++ patchwork/models.py
> @@ -79,7 +79,8 @@ class Project(models.Model):
>      webscm_url = models.CharField(max_length=2000, blank=True)
>      list_archive_url = models.CharField(max_length=2000, blank=True)
>      list_archive_url_format = models.CharField(
> -        max_length=2000, blank=True,
> +        max_length=2000,
> +        blank=True,
>          help_text="URL format for the list archive's Message-ID redirector. "
>          "{} will be replaced by the Message-ID.")
>      commit_url_format = models.CharField(
> @@ -783,6 +784,7 @@ class SeriesReference(models.Model):
>      required to handle the case whereby one or more patches are
>      received before the cover letter.
>      """
> +    project = models.ForeignKey(Project, on_delete=models.CASCADE)
>      series = models.ForeignKey(Series, related_name='references',
>                                 related_query_name='reference',
>                                 on_delete=models.CASCADE)
> @@ -792,7 +794,7 @@ class SeriesReference(models.Model):
>          return self.msgid
>  
>      class Meta:
> -        unique_together = [('series', 'msgid')]
> +        unique_together = [('project', 'msgid')]
>  
>  
>  class Bundle(models.Model):
> diff --git patchwork/parser.py patchwork/parser.py
> index 7dc66bc0..4b837ae0 100644
> --- patchwork/parser.py
> +++ patchwork/parser.py
> @@ -16,6 +16,7 @@ import re
>  
>  from django.contrib.auth.models import User
>  from django.db.utils import IntegrityError
> +from django.db import transaction
>  from django.utils import six
>  
>  from patchwork.models import Comment
> @@ -251,16 +252,9 @@ def _find_series_by_references(project, mail):
>      for ref in refs:
>          try:
>              return SeriesReference.objects.get(
> -                msgid=ref[:255], series__project=project).series
> +                msgid=ref[:255], project=project).series
>          except SeriesReference.DoesNotExist:
>              continue
> -        except SeriesReference.MultipleObjectsReturned:
> -            # FIXME: Open bug: this can happen when we're processing
> -            # messages in parallel. Pick the first and log.
> -            logger.error("Multiple SeriesReferences for %s in project %s!" %
> -                         (ref[:255], project.name))
> -            return SeriesReference.objects.filter(
> -                msgid=ref[:255], series__project=project).first().series
>  
>  
>  def _find_series_by_markers(project, mail, author):
> @@ -1029,47 +1023,64 @@ def parse_mail(mail, list_id=None):
>          except IntegrityError:
>              raise DuplicateMailError(msgid=msgid)
>  
> -        # if we don't have a series marker, we will never have an existing
> -        # series to match against.
> -        series = None
> -        if n:
> -            series = find_series(project, mail, author)
> +        for attempt in range(10):  # arbitrary retry count
> +            try:
> +                with transaction.atomic():
> +                    # if we don't have a series marker, we will never have an
> +                    # existing series to match against.
> +                    series = None
> +                    if n:
> +                        series = find_series(project, mail, author)
> +                    else:
> +                        x = n = 1
> +
> +                    # We will create a new series if:
> +                    # - there is no existing series to assign this patch to, or
> +                    # - there is an existing series, but it already has a patch
> +                    #   with this number in it
> +                    if not series or Patch.objects.filter(
> +                            series=series, number=x).count():
> +                        series = Series.objects.create(
> +                            project=project,
> +                            date=date,
> +                            submitter=author,
> +                            version=version,
> +                            total=n)
> +
> +                        # NOTE(stephenfin) We must save references for series.
> +                        # We do this to handle the case where a later patch is
> +                        # received first. Without storing references, it would
> +                        # not be possible to identify the relationship between
> +                        # patches as the earlier patch does not reference the
> +                        # later one.
> +                        for ref in refs + [msgid]:
> +                            ref = ref[:255]
> +                            # we don't want duplicates
> +                            try:
> +                                # we could have a ref to a previous series.
> +                                # (For example, a series sent in reply to
> +                                # another series.) That should not create a
> +                                # series ref for this series, so check for the
> +                                # msg-id only, not the msg-id/series pair.
> +                                SeriesReference.objects.get(
> +                                    msgid=ref, project=project)
> +                            except SeriesReference.DoesNotExist:
> +                                SeriesReference.objects.create(
> +                                    msgid=ref, project=project, series=series)
> +                    break
> +            except IntegrityError:
> +                # we lost the race so go again
> +                logger.info('Conflict while saving series. This is '
> +                            'probably because multiple patches belonging '
> +                            'to the same series have been received at '
> +                            'once. Trying again (attempt %02d/10)',
> +                            attempt)
>          else:
> -            x = n = 1
> -
> -        # We will create a new series if:
> -        # - there is no existing series to assign this patch to, or
> -        # - there is an existing series, but it already has a patch with this
> -        #   number in it
> -        if not series or Patch.objects.filter(series=series, number=x).count():
> -            series = Series.objects.create(
> -                project=project,
> -                date=date,
> -                submitter=author,
> -                version=version,
> -                total=n)
> -
> -            # NOTE(stephenfin) We must save references for series. We
> -            # do this to handle the case where a later patch is
> -            # received first. Without storing references, it would not
> -            # be possible to identify the relationship between patches
> -            # as the earlier patch does not reference the later one.
> -            for ref in refs + [msgid]:
> -                ref = ref[:255]
> -                # we don't want duplicates
> -                try:
> -                    # we could have a ref to a previous series. (For
> -                    # example, a series sent in reply to another
> -                    # series.) That should not create a series ref
> -                    # for this series, so check for the msg-id only,
> -                    # not the msg-id/series pair.
> -                    SeriesReference.objects.get(msgid=ref,
> -                                                series__project=project)
> -                except SeriesReference.DoesNotExist:
> -                    SeriesReference.objects.create(series=series, msgid=ref)
> -                except SeriesReference.MultipleObjectsReturned:
> -                    logger.error("Multiple SeriesReferences for %s"
> -                                 " in project %s!" % (ref, project.name))
> +            # we failed to save the series so return the series-less patch
> +            logger.warning('Failed to save series. Your patch has been '
> +                           'saved but this should not happen. Please '
> +                           'report this as a bug and include logs')
> +            return patch
>  
>          # add to a series if we have found one, and we have a numbered
>          # patch. Don't add unnumbered patches (for example diffs sent
> @@ -1107,14 +1118,9 @@ def parse_mail(mail, list_id=None):
>              # message
>              try:
>                  series = SeriesReference.objects.get(
> -                    msgid=msgid, series__project=project).series
> +                    msgid=msgid, project=project).series
>              except SeriesReference.DoesNotExist:
>                  series = None
> -            except SeriesReference.MultipleObjectsReturned:
> -                logger.error("Multiple SeriesReferences for %s"
> -                             " in project %s!" % (msgid, project.name))
> -                series = SeriesReference.objects.filter(
> -                    msgid=msgid, series__project=project).first().series
>  
>              if not series:
>                  series = Series.objects.create(
> @@ -1127,12 +1133,8 @@ def parse_mail(mail, list_id=None):
>                  # we don't save the in-reply-to or references fields
>                  # for a cover letter, as they can't refer to the same
>                  # series
> -                try:
> -                    SeriesReference.objects.get_or_create(series=series,
> -                                                          msgid=msgid)
> -                except SeriesReference.MultipleObjectsReturned:
> -                    logger.error("Multiple SeriesReferences for %s"
> -                                 " in project %s!" % (msgid, project.name))
> +                SeriesReference.objects.create(
> +                    msgid=msgid, project=project, series=series)
>  
>              try:
>                  cover_letter = CoverLetter.objects.create(
> diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
> index e5a2fca3..a69c64ba 100644
> --- patchwork/tests/test_parser.py
> +++ patchwork/tests/test_parser.py
> @@ -361,17 +361,20 @@ class SeriesCorrelationTest(TestCase):
>          msgids = [make_msgid()]
>          project = create_project()
>          series_v1 = create_series(project=project)
> -        create_series_reference(msgid=msgids[0], series=series_v1)
> +        create_series_reference(msgid=msgids[0], series=series_v1,
> +                                project=project)
>  
>          # ...and three patches
>          for i in range(3):
>              msgids.append(make_msgid())
> -            create_series_reference(msgid=msgids[-1], series=series_v1)
> +            create_series_reference(msgid=msgids[-1], series=series_v1,
> +                                    project=project)
>  
>          # now create a new series with "cover letter"
>          msgids.append(make_msgid())
>          series_v2 = create_series(project=project)
> -        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
> +        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
> +                                         project=project)
>  
>          # ...and the "first patch" of this new series
>          msgid = make_msgid()
> diff --git patchwork/tests/utils.py patchwork/tests/utils.py
> index 577183d0..4ead1781 100644
> --- patchwork/tests/utils.py
> +++ patchwork/tests/utils.py
> @@ -282,8 +282,12 @@ def create_series(**kwargs):
>  
>  def create_series_reference(**kwargs):
>      """Create 'SeriesReference' object."""
> +    project = kwargs.pop('project', create_project())
> +    series = kwargs.pop('series', create_series(project=project))
> +
>      values = {
> -        'series': create_series() if 'series' not in kwargs else None,
> +        'series': series,
> +        'project': project,
>          'msgid': make_msgid(),
>      }
>      values.update(**kwargs)
> -- 
> 2.21.0
Petr Vorel Oct. 17, 2019, 11:12 p.m. UTC | #2
Hi Stephen,

> Currently, the 'SeriesReference' object has a unique constraint on the
> two fields it has, 'series', which is a foreign key to 'Series', and
> 'msgid'. This is the wrong constraint. What we actually want to enforce
> is that a patch, cover letter or comment is referenced by a single
> series, or rather a single series per project the submission appears on.
> As such, we should be enforcing uniqueness on the msgid and the project
> that the patch, cover letter or comment belongs to.
+1.

> This requires adding a new field to the object, 'project', since it's
> not possible to do something like the following:

>   unique_together = [('msgid', 'series__project')]

> This is detailed here [1]. In addition, the migration needs a precursor
> migration step to merge any broken series.

> [1] https://stackoverflow.com/a/4440189/613428

> Signed-off-by: Stephen Finucane <stephen@that.guru>
> Closes: #241
> Cc: Daniel Axtens <dja@axtens.net>
> Cc: Petr Vorel <petr.vorel@gmail.com>

I cannot review your patch, but thanks a lot for implementing fix for #241.

Something related (I should create GH issue): it'd be great if cover letter was
always displayed (no matter how, but with all replies). ATM it's displayed as a
separate patch, if there is diff included: series [1], separated cover letter
[2] (there is no series).

Kind regards,
Petr

[1] https://patchwork.ozlabs.org/project/ltp/list/?series=135548&state=*
[2] https://patchwork.ozlabs.org/patch/1175082/
Stephen Finucane Oct. 18, 2019, 5:28 a.m. UTC | #3
On Fri, 2019-10-18 at 09:48 +1100, Daniel Axtens wrote:
> Hi Stephen,
> 
> Were you aiming this for 2.2 or for 3?
> 
> I'm thinking 3 but happy to be convinced otherwise.

I'd personally like this to be a blocker for v2.2, though the Python 3
part of the migration can possibly be dropped (I can respin if
necessary). This is a pretty dumb issue and the fix is pretty trivial,
all in all, but it's not backportable (model changes). I don't really
want to have broken series keep popping up for the next 12 months (or
however long it takes for us to wrap up a v3.0).

Let me know if there's anything I can do to help move it along.

> I'll also open a topic branch for PW3 and start posting some of my
> Python 2 removal patches.

I've a question/comment on this but I'll reply on the other thread.

Stephen

> Regards,
> Daniel
> 
> > Currently, the 'SeriesReference' object has a unique constraint on the
> > two fields it has, 'series', which is a foreign key to 'Series', and
> > 'msgid'. This is the wrong constraint. What we actually want to enforce
> > is that a patch, cover letter or comment is referenced by a single
> > series, or rather a single series per project the submission appears on.
> > As such, we should be enforcing uniqueness on the msgid and the project
> > that the patch, cover letter or comment belongs to.
> > 
> > This requires adding a new field to the object, 'project', since it's
> > not possible to do something like the following:
> > 
> >   unique_together = [('msgid', 'series__project')]
> > 
> > This is detailed here [1]. In addition, the migration needs a precursor
> > migration step to merge any broken series.
> > 
> > [1] https://stackoverflow.com/a/4440189/613428
> > 
> > Signed-off-by: Stephen Finucane <stephen@that.guru>
> > Closes: #241
> > Cc: Daniel Axtens <dja@axtens.net>
> > Cc: Petr Vorel <petr.vorel@gmail.com>
> > ---
> >  patchwork/migrations/0037_python_3.py         | 298 ++++++++++++++++++
> >  .../0038_unique_series_references.py          | 121 +++++++
> >  patchwork/models.py                           |   6 +-
> >  patchwork/parser.py                           | 122 +++----
> >  patchwork/tests/test_parser.py                |   9 +-
> >  patchwork/tests/utils.py                      |   6 +-
> >  6 files changed, 496 insertions(+), 66 deletions(-)
> >  create mode 100644 patchwork/migrations/0037_python_3.py
> >  create mode 100644 patchwork/migrations/0038_unique_series_references.py
> > 
> > diff --git patchwork/migrations/0037_python_3.py patchwork/migrations/0037_python_3.py
> > new file mode 100644
> > index 00000000..416a7045
> > --- /dev/null
> > +++ patchwork/migrations/0037_python_3.py
> > @@ -0,0 +1,298 @@
> > +import datetime
> > +
> > +from django.conf import settings
> > +from django.db import migrations, models
> > +import django.db.models.deletion
> > +
> > +import patchwork.models
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [('patchwork', '0036_project_commit_url_format')]
> > +
> > +    operations = [
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='context',
> > +            field=models.SlugField(
> > +                default='default',
> > +                help_text='A label to discern check from checks of other testing systems.',  # noqa
> > +                max_length=255,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='description',
> > +            field=models.TextField(
> > +                blank=True,
> > +                help_text='A brief description of the check.',
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='state',
> > +            field=models.SmallIntegerField(
> > +                choices=[
> > +                    (0, 'pending'),
> > +                    (1, 'success'),
> > +                    (2, 'warning'),
> > +                    (3, 'fail'),
> > +                ],
> > +                default=0,
> > +                help_text='The state of the check.',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='target_url',
> > +            field=models.URLField(
> > +                blank=True,
> > +                help_text='The target URL to associate with this check. This should be specific to the patch.',  # noqa
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='comment',
> > +            name='submission',
> > +            field=models.ForeignKey(
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='comments',
> > +                related_query_name='comment',
> > +                to='patchwork.Submission',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='delegationrule',
> > +            name='path',
> > +            field=models.CharField(
> > +                help_text='An fnmatch-style pattern to match filenames against.',  # noqa
> > +                max_length=255,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='delegationrule',
> > +            name='priority',
> > +            field=models.IntegerField(
> > +                default=0,
> > +                help_text='The priority of the rule. Rules with a higher priority will override rules with lower priorities',  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='delegationrule',
> > +            name='user',
> > +            field=models.ForeignKey(
> > +                help_text='A user to delegate the patch to.',
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                to=settings.AUTH_USER_MODEL,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='emailconfirmation',
> > +            name='type',
> > +            field=models.CharField(
> > +                choices=[
> > +                    ('userperson', 'User-Person association'),
> > +                    ('registration', 'Registration'),
> > +                    ('optout', 'Email opt-out'),
> > +                ],
> > +                max_length=20,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='category',
> > +            field=models.CharField(
> > +                choices=[
> > +                    ('cover-created', 'Cover Letter Created'),
> > +                    ('patch-created', 'Patch Created'),
> > +                    ('patch-completed', 'Patch Completed'),
> > +                    ('patch-state-changed', 'Patch State Changed'),
> > +                    ('patch-delegated', 'Patch Delegate Changed'),
> > +                    ('check-created', 'Check Created'),
> > +                    ('series-created', 'Series Created'),
> > +                    ('series-completed', 'Series Completed'),
> > +                ],
> > +                db_index=True,
> > +                help_text='The category of the event.',
> > +                max_length=20,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='cover',
> > +            field=models.ForeignKey(
> > +                blank=True,
> > +                help_text='The cover letter that this event was created for.',
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.CoverLetter',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='date',
> > +            field=models.DateTimeField(
> > +                default=datetime.datetime.utcnow,
> > +                help_text='The time this event was created.',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='patch',
> > +            field=models.ForeignKey(
> > +                blank=True,
> > +                help_text='The patch that this event was created for.',
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.Patch',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='project',
> > +            field=models.ForeignKey(
> > +                help_text='The project that the events belongs to.',
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.Project',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='series',
> > +            field=models.ForeignKey(
> > +                blank=True,
> > +                help_text='The series that this event was created for.',
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.Series',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='patch',
> > +            name='number',
> > +            field=models.PositiveSmallIntegerField(
> > +                default=None,
> > +                help_text='The number assigned to this patch in the series',
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='project',
> > +            name='commit_url_format',
> > +            field=models.CharField(
> > +                blank=True,
> > +                help_text='URL format for a particular commit. {} will be replaced by the commit SHA.',  # noqa
> > +                max_length=2000,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='project',
> > +            name='list_archive_url_format',
> > +            field=models.CharField(
> > +                blank=True,
> > +                help_text="URL format for the list archive's Message-ID redirector. {} will be replaced by the Message-ID.",  # noqa
> > +                max_length=2000,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='project',
> > +            name='subject_match',
> > +            field=models.CharField(
> > +                blank=True,
> > +                default='',
> > +                help_text='Regex to match the subject against if only part of emails sent to the list belongs to this project. Will be used with IGNORECASE and MULTILINE flags. If rules for more projects match the first one returned from DB is chosen; empty field serves as a default for every email which has no other match.',  # noqa
> > +                max_length=64,
> > +                validators=[patchwork.models.validate_regex_compiles],
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='series',
> > +            name='name',
> > +            field=models.CharField(
> > +                blank=True,
> > +                help_text='An optional name to associate with the series, e.g. "John\'s PCI series".',  # noqa
> > +                max_length=255,
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='series',
> > +            name='total',
> > +            field=models.IntegerField(
> > +                help_text='Number of patches in series as indicated by the subject prefix(es)'  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='series',
> > +            name='version',
> > +            field=models.IntegerField(
> > +                default=1,
> > +                help_text='Version of series as indicated by the subject prefix(es)',  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='seriesreference',
> > +            name='series',
> > +            field=models.ForeignKey(
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='references',
> > +                related_query_name='reference',
> > +                to='patchwork.Series',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='tag',
> > +            name='abbrev',
> > +            field=models.CharField(
> > +                help_text='Short (one-or-two letter) abbreviation for the tag, used in table column headers',  # noqa
> > +                max_length=2,
> > +                unique=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='tag',
> > +            name='pattern',
> > +            field=models.CharField(
> > +                help_text='A simple regex to match the tag in the content of a message. Will be used with MULTILINE and IGNORECASE flags. eg. ^Acked-by:',  # noqa
> > +                max_length=50,
> > +                validators=[patchwork.models.validate_regex_compiles],
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='tag',
> > +            name='show_column',
> > +            field=models.BooleanField(
> > +                default=True,
> > +                help_text="Show a column displaying this tag's count in the patch list view",  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='userprofile',
> > +            name='items_per_page',
> > +            field=models.PositiveIntegerField(
> > +                default=100, help_text='Number of items to display per page'
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='userprofile',
> > +            name='send_email',
> > +            field=models.BooleanField(
> > +                default=False,
> > +                help_text='Selecting this option allows patchwork to send email on your behalf',  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='userprofile',
> > +            name='show_ids',
> > +            field=models.BooleanField(
> > +                default=False,
> > +                help_text='Show click-to-copy patch IDs in the list view',
> > +            ),
> > +        ),
> > +    ]
> > diff --git patchwork/migrations/0038_unique_series_references.py patchwork/migrations/0038_unique_series_references.py
> > new file mode 100644
> > index 00000000..c5517ada
> > --- /dev/null
> > +++ patchwork/migrations/0038_unique_series_references.py
> > @@ -0,0 +1,121 @@
> > +from django.db import connection, migrations, models
> > +from django.db.models import Count
> > +import django.db.models.deletion
> > +
> > +
> > +def merge_duplicate_series(apps, schema_editor):
> > +    SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> > +    Patch = apps.get_model('patchwork', 'Patch')
> > +
> > +    msgid_seriesrefs = {}
> > +
> > +    # find all SeriesReference that share a msgid but point to different series
> > +    # and decide which of the series is going to be the authoritative one
> > +    msgid_counts = (
> > +        SeriesReference.objects.values('msgid')
> > +        .annotate(count=Count('msgid'))
> > +        .filter(count__gt=1)
> > +    )
> > +    for msgid_count in msgid_counts:
> > +        msgid = msgid_count['msgid']
> > +        chosen_ref = None
> > +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> > +            if series_ref.series.cover_letter:
> > +                if chosen_ref:
> > +                    # I don't think this can happen, but explode if it does
> > +                    raise Exception(
> > +                        "Looks like you've got two or more series that share "
> > +                        "some patches but do not share a cover letter. Unable "
> > +                        "to auto-resolve."
> > +                    )
> > +
> > +                # if a series has a cover letter, that's the one we'll group
> > +                # everything under
> > +                chosen_ref = series_ref
> > +
> > +        if not chosen_ref:
> > +            # if none of the series have cover letters, simply use the last
> > +            # one (hint: this relies on Python's weird scoping for for loops
> > +            # where 'series_ref' is still accessible outside the loop)
> > +            chosen_ref = series_ref
> > +
> > +        msgid_seriesrefs[msgid] = chosen_ref
> > +
> > +    # reassign any patches referring to non-authoritative series to point to
> > +    # the authoritative one, and delete the other series; we do this separately
> > +    # to allow us a chance to raise the exception above if necessary
> > +    for msgid, chosen_ref in msgid_seriesrefs.items():
> > +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> > +            if series_ref == chosen_ref:
> > +                continue
> > +
> > +            # update the patches to point to our chosen series instead, on the
> > +            # assumption that all other metadata is correct
> > +            for patch in Patch.objects.filter(series=series_ref.series):
> > +                patch.series = chosen_ref.series
> > +                patch.save()
> > +
> > +            # delete the other series (which will delete the series ref)
> > +            series_ref.series.delete()
> > +
> > +
> > +def copy_project_field(apps, schema_editor):
> > +    if connection.vendor == 'postgresql':
> > +        schema_editor.execute(
> > +            """
> > +            UPDATE patchwork_seriesreference
> > +              SET project_id = patchwork_series.project_id
> > +            FROM patchwork_series
> > +              WHERE patchwork_seriesreference.series_id = patchwork_series.id
> > +        """
> > +        )
> > +    elif connection.vendor == 'mysql':
> > +        schema_editor.execute(
> > +            """
> > +            UPDATE patchwork_seriesreference, patchwork_series
> > +              SET patchwork_seriesreference.project_id = patchwork_series.project_id
> > +            WHERE patchwork_seriesreference.series_id = patchwork_series.id
> > +        """
> > +        )  # noqa
> > +    else:
> > +        SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> > +
> > +        for series_ref in SeriesReference.objects.all().select_related(
> > +            'series'
> > +        ):
> > +            series_ref.project = series_ref.series.project
> > +            series_ref.save()
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [('patchwork', '0037_python_3')]
> > +
> > +    operations = [
> > +        migrations.RunPython(
> > +            merge_duplicate_series, migrations.RunPython.noop, atomic=False
> > +        ),
> > +        migrations.AddField(
> > +            model_name='seriesreference',
> > +            name='project',
> > +            field=models.ForeignKey(
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                to='patchwork.Project',
> > +            ),
> > +        ),
> > +        migrations.AlterUniqueTogether(
> > +            name='seriesreference', unique_together={('project', 'msgid')}
> > +        ),
> > +        migrations.RunPython(
> > +            copy_project_field, migrations.RunPython.noop, atomic=False
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='seriesreference',
> > +            name='project',
> > +            field=models.ForeignKey(
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                to='patchwork.Project',
> > +            ),
> > +        ),
> > +    ]
> > diff --git patchwork/models.py patchwork/models.py
> > index c198bc2c..e5bfecf8 100644
> > --- patchwork/models.py
> > +++ patchwork/models.py
> > @@ -79,7 +79,8 @@ class Project(models.Model):
> >      webscm_url = models.CharField(max_length=2000, blank=True)
> >      list_archive_url = models.CharField(max_length=2000, blank=True)
> >      list_archive_url_format = models.CharField(
> > -        max_length=2000, blank=True,
> > +        max_length=2000,
> > +        blank=True,
> >          help_text="URL format for the list archive's Message-ID redirector. "
> >          "{} will be replaced by the Message-ID.")
> >      commit_url_format = models.CharField(
> > @@ -783,6 +784,7 @@ class SeriesReference(models.Model):
> >      required to handle the case whereby one or more patches are
> >      received before the cover letter.
> >      """
> > +    project = models.ForeignKey(Project, on_delete=models.CASCADE)
> >      series = models.ForeignKey(Series, related_name='references',
> >                                 related_query_name='reference',
> >                                 on_delete=models.CASCADE)
> > @@ -792,7 +794,7 @@ class SeriesReference(models.Model):
> >          return self.msgid
> >  
> >      class Meta:
> > -        unique_together = [('series', 'msgid')]
> > +        unique_together = [('project', 'msgid')]
> >  
> >  
> >  class Bundle(models.Model):
> > diff --git patchwork/parser.py patchwork/parser.py
> > index 7dc66bc0..4b837ae0 100644
> > --- patchwork/parser.py
> > +++ patchwork/parser.py
> > @@ -16,6 +16,7 @@ import re
> >  
> >  from django.contrib.auth.models import User
> >  from django.db.utils import IntegrityError
> > +from django.db import transaction
> >  from django.utils import six
> >  
> >  from patchwork.models import Comment
> > @@ -251,16 +252,9 @@ def _find_series_by_references(project, mail):
> >      for ref in refs:
> >          try:
> >              return SeriesReference.objects.get(
> > -                msgid=ref[:255], series__project=project).series
> > +                msgid=ref[:255], project=project).series
> >          except SeriesReference.DoesNotExist:
> >              continue
> > -        except SeriesReference.MultipleObjectsReturned:
> > -            # FIXME: Open bug: this can happen when we're processing
> > -            # messages in parallel. Pick the first and log.
> > -            logger.error("Multiple SeriesReferences for %s in project %s!" %
> > -                         (ref[:255], project.name))
> > -            return SeriesReference.objects.filter(
> > -                msgid=ref[:255], series__project=project).first().series
> >  
> >  
> >  def _find_series_by_markers(project, mail, author):
> > @@ -1029,47 +1023,64 @@ def parse_mail(mail, list_id=None):
> >          except IntegrityError:
> >              raise DuplicateMailError(msgid=msgid)
> >  
> > -        # if we don't have a series marker, we will never have an existing
> > -        # series to match against.
> > -        series = None
> > -        if n:
> > -            series = find_series(project, mail, author)
> > +        for attempt in range(10):  # arbitrary retry count
> > +            try:
> > +                with transaction.atomic():
> > +                    # if we don't have a series marker, we will never have an
> > +                    # existing series to match against.
> > +                    series = None
> > +                    if n:
> > +                        series = find_series(project, mail, author)
> > +                    else:
> > +                        x = n = 1
> > +
> > +                    # We will create a new series if:
> > +                    # - there is no existing series to assign this patch to, or
> > +                    # - there is an existing series, but it already has a patch
> > +                    #   with this number in it
> > +                    if not series or Patch.objects.filter(
> > +                            series=series, number=x).count():
> > +                        series = Series.objects.create(
> > +                            project=project,
> > +                            date=date,
> > +                            submitter=author,
> > +                            version=version,
> > +                            total=n)
> > +
> > +                        # NOTE(stephenfin) We must save references for series.
> > +                        # We do this to handle the case where a later patch is
> > +                        # received first. Without storing references, it would
> > +                        # not be possible to identify the relationship between
> > +                        # patches as the earlier patch does not reference the
> > +                        # later one.
> > +                        for ref in refs + [msgid]:
> > +                            ref = ref[:255]
> > +                            # we don't want duplicates
> > +                            try:
> > +                                # we could have a ref to a previous series.
> > +                                # (For example, a series sent in reply to
> > +                                # another series.) That should not create a
> > +                                # series ref for this series, so check for the
> > +                                # msg-id only, not the msg-id/series pair.
> > +                                SeriesReference.objects.get(
> > +                                    msgid=ref, project=project)
> > +                            except SeriesReference.DoesNotExist:
> > +                                SeriesReference.objects.create(
> > +                                    msgid=ref, project=project, series=series)
> > +                    break
> > +            except IntegrityError:
> > +                # we lost the race so go again
> > +                logger.info('Conflict while saving series. This is '
> > +                            'probably because multiple patches belonging '
> > +                            'to the same series have been received at '
> > +                            'once. Trying again (attempt %02d/10)',
> > +                            attempt)
> >          else:
> > -            x = n = 1
> > -
> > -        # We will create a new series if:
> > -        # - there is no existing series to assign this patch to, or
> > -        # - there is an existing series, but it already has a patch with this
> > -        #   number in it
> > -        if not series or Patch.objects.filter(series=series, number=x).count():
> > -            series = Series.objects.create(
> > -                project=project,
> > -                date=date,
> > -                submitter=author,
> > -                version=version,
> > -                total=n)
> > -
> > -            # NOTE(stephenfin) We must save references for series. We
> > -            # do this to handle the case where a later patch is
> > -            # received first. Without storing references, it would not
> > -            # be possible to identify the relationship between patches
> > -            # as the earlier patch does not reference the later one.
> > -            for ref in refs + [msgid]:
> > -                ref = ref[:255]
> > -                # we don't want duplicates
> > -                try:
> > -                    # we could have a ref to a previous series. (For
> > -                    # example, a series sent in reply to another
> > -                    # series.) That should not create a series ref
> > -                    # for this series, so check for the msg-id only,
> > -                    # not the msg-id/series pair.
> > -                    SeriesReference.objects.get(msgid=ref,
> > -                                                series__project=project)
> > -                except SeriesReference.DoesNotExist:
> > -                    SeriesReference.objects.create(series=series, msgid=ref)
> > -                except SeriesReference.MultipleObjectsReturned:
> > -                    logger.error("Multiple SeriesReferences for %s"
> > -                                 " in project %s!" % (ref, project.name))
> > +            # we failed to save the series so return the series-less patch
> > +            logger.warning('Failed to save series. Your patch has been '
> > +                           'saved but this should not happen. Please '
> > +                           'report this as a bug and include logs')
> > +            return patch
> >  
> >          # add to a series if we have found one, and we have a numbered
> >          # patch. Don't add unnumbered patches (for example diffs sent
> > @@ -1107,14 +1118,9 @@ def parse_mail(mail, list_id=None):
> >              # message
> >              try:
> >                  series = SeriesReference.objects.get(
> > -                    msgid=msgid, series__project=project).series
> > +                    msgid=msgid, project=project).series
> >              except SeriesReference.DoesNotExist:
> >                  series = None
> > -            except SeriesReference.MultipleObjectsReturned:
> > -                logger.error("Multiple SeriesReferences for %s"
> > -                             " in project %s!" % (msgid, project.name))
> > -                series = SeriesReference.objects.filter(
> > -                    msgid=msgid, series__project=project).first().series
> >  
> >              if not series:
> >                  series = Series.objects.create(
> > @@ -1127,12 +1133,8 @@ def parse_mail(mail, list_id=None):
> >                  # we don't save the in-reply-to or references fields
> >                  # for a cover letter, as they can't refer to the same
> >                  # series
> > -                try:
> > -                    SeriesReference.objects.get_or_create(series=series,
> > -                                                          msgid=msgid)
> > -                except SeriesReference.MultipleObjectsReturned:
> > -                    logger.error("Multiple SeriesReferences for %s"
> > -                                 " in project %s!" % (msgid, project.name))
> > +                SeriesReference.objects.create(
> > +                    msgid=msgid, project=project, series=series)
> >  
> >              try:
> >                  cover_letter = CoverLetter.objects.create(
> > diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
> > index e5a2fca3..a69c64ba 100644
> > --- patchwork/tests/test_parser.py
> > +++ patchwork/tests/test_parser.py
> > @@ -361,17 +361,20 @@ class SeriesCorrelationTest(TestCase):
> >          msgids = [make_msgid()]
> >          project = create_project()
> >          series_v1 = create_series(project=project)
> > -        create_series_reference(msgid=msgids[0], series=series_v1)
> > +        create_series_reference(msgid=msgids[0], series=series_v1,
> > +                                project=project)
> >  
> >          # ...and three patches
> >          for i in range(3):
> >              msgids.append(make_msgid())
> > -            create_series_reference(msgid=msgids[-1], series=series_v1)
> > +            create_series_reference(msgid=msgids[-1], series=series_v1,
> > +                                    project=project)
> >  
> >          # now create a new series with "cover letter"
> >          msgids.append(make_msgid())
> >          series_v2 = create_series(project=project)
> > -        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
> > +        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
> > +                                         project=project)
> >  
> >          # ...and the "first patch" of this new series
> >          msgid = make_msgid()
> > diff --git patchwork/tests/utils.py patchwork/tests/utils.py
> > index 577183d0..4ead1781 100644
> > --- patchwork/tests/utils.py
> > +++ patchwork/tests/utils.py
> > @@ -282,8 +282,12 @@ def create_series(**kwargs):
> >  
> >  def create_series_reference(**kwargs):
> >      """Create 'SeriesReference' object."""
> > +    project = kwargs.pop('project', create_project())
> > +    series = kwargs.pop('series', create_series(project=project))
> > +
> >      values = {
> > -        'series': create_series() if 'series' not in kwargs else None,
> > +        'series': series,
> > +        'project': project,
> >          'msgid': make_msgid(),
> >      }
> >      values.update(**kwargs)
> > -- 
> > 2.21.0
Daniel Axtens Oct. 18, 2019, 1:57 p.m. UTC | #4
> Closes: #241

I'm not sure this is quite correct, sadly. Using the parallel parser I
posted, I see a difference in the number of series created by parsing in
parallel and parsing serially.

# serial (-j1)
>>> Series.objects.count()
28016

# parallel (-j6)
>>> Series.objects.count()
28065


I investigated just the first Untitled series from the parallel case,
where I see that one 6-patch series has been split up a lot:

>>> pprint.pprint([(p.name, p.series) for p in Patch.objects.filter(submitter__id=42)[0:6]])
[('[1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD '
  'CPUs',
  <Series: [1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD CPUs>),
 ('[2/6] x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs',
  <Series: Untitled series #307>),
 ('[3/6] x86: move nop declarations into separate include file',
  <Series: Untitled series #544>),
 ('[4/6] x86: introduce rdtsc_barrier()', <Series: Untitled series #307>),
 ('[5/6] x86: remove get_cycles_sync', <Series: Untitled series #559>),
 ('[6/6] x86: read_tsc sync', <Series: Untitled series #449>)]

If I do serial parsing, this does not get split up:

>>> pprint.pprint([(p.name, p.series) for p in Patch.objects.filter(submitter__id=74)[0:6]])
[('[1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD '
  'CPUs',
  <Series: Get rid of CPUID from gettimeofday>),
 ('[2/6] x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs',
  <Series: Get rid of CPUID from gettimeofday>),
 ('[3/6] x86: move nop declarations into separate include file',
  <Series: Get rid of CPUID from gettimeofday>),
 ('[4/6] x86: introduce rdtsc_barrier()',
  <Series: Get rid of CPUID from gettimeofday>),
 ('[5/6] x86: remove get_cycles_sync',
  <Series: Get rid of CPUID from gettimeofday>),
 ('[6/6] x86: read_tsc sync', <Series: Get rid of CPUID from gettimeofday>)]

I haven't looked at the series in any detail beyond this - it might
still be worth including, but it just doesn't quite squash the
bug, sadly.

Regards,
Daniel

> Cc: Daniel Axtens <dja@axtens.net>
> Cc: Petr Vorel <petr.vorel@gmail.com>
> ---
>  patchwork/migrations/0037_python_3.py         | 298 ++++++++++++++++++
>  .../0038_unique_series_references.py          | 121 +++++++
>  patchwork/models.py                           |   6 +-
>  patchwork/parser.py                           | 122 +++----
>  patchwork/tests/test_parser.py                |   9 +-
>  patchwork/tests/utils.py                      |   6 +-
>  6 files changed, 496 insertions(+), 66 deletions(-)
>  create mode 100644 patchwork/migrations/0037_python_3.py
>  create mode 100644 patchwork/migrations/0038_unique_series_references.py
>
> diff --git patchwork/migrations/0037_python_3.py patchwork/migrations/0037_python_3.py
> new file mode 100644
> index 00000000..416a7045
> --- /dev/null
> +++ patchwork/migrations/0037_python_3.py
> @@ -0,0 +1,298 @@
> +import datetime
> +
> +from django.conf import settings
> +from django.db import migrations, models
> +import django.db.models.deletion
> +
> +import patchwork.models
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [('patchwork', '0036_project_commit_url_format')]
> +
> +    operations = [
> +        migrations.AlterField(
> +            model_name='check',
> +            name='context',
> +            field=models.SlugField(
> +                default='default',
> +                help_text='A label to discern check from checks of other testing systems.',  # noqa
> +                max_length=255,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='check',
> +            name='description',
> +            field=models.TextField(
> +                blank=True,
> +                help_text='A brief description of the check.',
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='check',
> +            name='state',
> +            field=models.SmallIntegerField(
> +                choices=[
> +                    (0, 'pending'),
> +                    (1, 'success'),
> +                    (2, 'warning'),
> +                    (3, 'fail'),
> +                ],
> +                default=0,
> +                help_text='The state of the check.',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='check',
> +            name='target_url',
> +            field=models.URLField(
> +                blank=True,
> +                help_text='The target URL to associate with this check. This should be specific to the patch.',  # noqa
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='comment',
> +            name='submission',
> +            field=models.ForeignKey(
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='comments',
> +                related_query_name='comment',
> +                to='patchwork.Submission',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='delegationrule',
> +            name='path',
> +            field=models.CharField(
> +                help_text='An fnmatch-style pattern to match filenames against.',  # noqa
> +                max_length=255,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='delegationrule',
> +            name='priority',
> +            field=models.IntegerField(
> +                default=0,
> +                help_text='The priority of the rule. Rules with a higher priority will override rules with lower priorities',  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='delegationrule',
> +            name='user',
> +            field=models.ForeignKey(
> +                help_text='A user to delegate the patch to.',
> +                on_delete=django.db.models.deletion.CASCADE,
> +                to=settings.AUTH_USER_MODEL,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='emailconfirmation',
> +            name='type',
> +            field=models.CharField(
> +                choices=[
> +                    ('userperson', 'User-Person association'),
> +                    ('registration', 'Registration'),
> +                    ('optout', 'Email opt-out'),
> +                ],
> +                max_length=20,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='category',
> +            field=models.CharField(
> +                choices=[
> +                    ('cover-created', 'Cover Letter Created'),
> +                    ('patch-created', 'Patch Created'),
> +                    ('patch-completed', 'Patch Completed'),
> +                    ('patch-state-changed', 'Patch State Changed'),
> +                    ('patch-delegated', 'Patch Delegate Changed'),
> +                    ('check-created', 'Check Created'),
> +                    ('series-created', 'Series Created'),
> +                    ('series-completed', 'Series Completed'),
> +                ],
> +                db_index=True,
> +                help_text='The category of the event.',
> +                max_length=20,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='cover',
> +            field=models.ForeignKey(
> +                blank=True,
> +                help_text='The cover letter that this event was created for.',
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.CoverLetter',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='date',
> +            field=models.DateTimeField(
> +                default=datetime.datetime.utcnow,
> +                help_text='The time this event was created.',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='patch',
> +            field=models.ForeignKey(
> +                blank=True,
> +                help_text='The patch that this event was created for.',
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.Patch',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='project',
> +            field=models.ForeignKey(
> +                help_text='The project that the events belongs to.',
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.Project',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='event',
> +            name='series',
> +            field=models.ForeignKey(
> +                blank=True,
> +                help_text='The series that this event was created for.',
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='+',
> +                to='patchwork.Series',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='patch',
> +            name='number',
> +            field=models.PositiveSmallIntegerField(
> +                default=None,
> +                help_text='The number assigned to this patch in the series',
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='project',
> +            name='commit_url_format',
> +            field=models.CharField(
> +                blank=True,
> +                help_text='URL format for a particular commit. {} will be replaced by the commit SHA.',  # noqa
> +                max_length=2000,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='project',
> +            name='list_archive_url_format',
> +            field=models.CharField(
> +                blank=True,
> +                help_text="URL format for the list archive's Message-ID redirector. {} will be replaced by the Message-ID.",  # noqa
> +                max_length=2000,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='project',
> +            name='subject_match',
> +            field=models.CharField(
> +                blank=True,
> +                default='',
> +                help_text='Regex to match the subject against if only part of emails sent to the list belongs to this project. Will be used with IGNORECASE and MULTILINE flags. If rules for more projects match the first one returned from DB is chosen; empty field serves as a default for every email which has no other match.',  # noqa
> +                max_length=64,
> +                validators=[patchwork.models.validate_regex_compiles],
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='series',
> +            name='name',
> +            field=models.CharField(
> +                blank=True,
> +                help_text='An optional name to associate with the series, e.g. "John\'s PCI series".',  # noqa
> +                max_length=255,
> +                null=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='series',
> +            name='total',
> +            field=models.IntegerField(
> +                help_text='Number of patches in series as indicated by the subject prefix(es)'  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='series',
> +            name='version',
> +            field=models.IntegerField(
> +                default=1,
> +                help_text='Version of series as indicated by the subject prefix(es)',  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='seriesreference',
> +            name='series',
> +            field=models.ForeignKey(
> +                on_delete=django.db.models.deletion.CASCADE,
> +                related_name='references',
> +                related_query_name='reference',
> +                to='patchwork.Series',
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='tag',
> +            name='abbrev',
> +            field=models.CharField(
> +                help_text='Short (one-or-two letter) abbreviation for the tag, used in table column headers',  # noqa
> +                max_length=2,
> +                unique=True,
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='tag',
> +            name='pattern',
> +            field=models.CharField(
> +                help_text='A simple regex to match the tag in the content of a message. Will be used with MULTILINE and IGNORECASE flags. eg. ^Acked-by:',  # noqa
> +                max_length=50,
> +                validators=[patchwork.models.validate_regex_compiles],
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='tag',
> +            name='show_column',
> +            field=models.BooleanField(
> +                default=True,
> +                help_text="Show a column displaying this tag's count in the patch list view",  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='userprofile',
> +            name='items_per_page',
> +            field=models.PositiveIntegerField(
> +                default=100, help_text='Number of items to display per page'
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='userprofile',
> +            name='send_email',
> +            field=models.BooleanField(
> +                default=False,
> +                help_text='Selecting this option allows patchwork to send email on your behalf',  # noqa
> +            ),
> +        ),
> +        migrations.AlterField(
> +            model_name='userprofile',
> +            name='show_ids',
> +            field=models.BooleanField(
> +                default=False,
> +                help_text='Show click-to-copy patch IDs in the list view',
> +            ),
> +        ),
> +    ]
> diff --git patchwork/migrations/0038_unique_series_references.py patchwork/migrations/0038_unique_series_references.py
> new file mode 100644
> index 00000000..c5517ada
> --- /dev/null
> +++ patchwork/migrations/0038_unique_series_references.py
> @@ -0,0 +1,121 @@
> +from django.db import connection, migrations, models
> +from django.db.models import Count
> +import django.db.models.deletion
> +
> +
> +def merge_duplicate_series(apps, schema_editor):
> +    SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> +    Patch = apps.get_model('patchwork', 'Patch')
> +
> +    msgid_seriesrefs = {}
> +
> +    # find all SeriesReference that share a msgid but point to different series
> +    # and decide which of the series is going to be the authoritative one
> +    msgid_counts = (
> +        SeriesReference.objects.values('msgid')
> +        .annotate(count=Count('msgid'))
> +        .filter(count__gt=1)
> +    )
> +    for msgid_count in msgid_counts:
> +        msgid = msgid_count['msgid']
> +        chosen_ref = None
> +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> +            if series_ref.series.cover_letter:
> +                if chosen_ref:
> +                    # I don't think this can happen, but explode if it does
> +                    raise Exception(
> +                        "Looks like you've got two or more series that share "
> +                        "some patches but do not share a cover letter. Unable "
> +                        "to auto-resolve."
> +                    )
> +
> +                # if a series has a cover letter, that's the one we'll group
> +                # everything under
> +                chosen_ref = series_ref
> +
> +        if not chosen_ref:
> +            # if none of the series have cover letters, simply use the last
> +            # one (hint: this relies on Python's weird scoping for for loops
> +            # where 'series_ref' is still accessible outside the loop)
> +            chosen_ref = series_ref
> +
> +        msgid_seriesrefs[msgid] = chosen_ref
> +
> +    # reassign any patches referring to non-authoritative series to point to
> +    # the authoritative one, and delete the other series; we do this separately
> +    # to allow us a chance to raise the exception above if necessary
> +    for msgid, chosen_ref in msgid_seriesrefs.items():
> +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> +            if series_ref == chosen_ref:
> +                continue
> +
> +            # update the patches to point to our chosen series instead, on the
> +            # assumption that all other metadata is correct
> +            for patch in Patch.objects.filter(series=series_ref.series):
> +                patch.series = chosen_ref.series
> +                patch.save()
> +
> +            # delete the other series (which will delete the series ref)
> +            series_ref.series.delete()
> +
> +
> +def copy_project_field(apps, schema_editor):
> +    if connection.vendor == 'postgresql':
> +        schema_editor.execute(
> +            """
> +            UPDATE patchwork_seriesreference
> +              SET project_id = patchwork_series.project_id
> +            FROM patchwork_series
> +              WHERE patchwork_seriesreference.series_id = patchwork_series.id
> +        """
> +        )
> +    elif connection.vendor == 'mysql':
> +        schema_editor.execute(
> +            """
> +            UPDATE patchwork_seriesreference, patchwork_series
> +              SET patchwork_seriesreference.project_id = patchwork_series.project_id
> +            WHERE patchwork_seriesreference.series_id = patchwork_series.id
> +        """
> +        )  # noqa
> +    else:
> +        SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> +
> +        for series_ref in SeriesReference.objects.all().select_related(
> +            'series'
> +        ):
> +            series_ref.project = series_ref.series.project
> +            series_ref.save()
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [('patchwork', '0037_python_3')]
> +
> +    operations = [
> +        migrations.RunPython(
> +            merge_duplicate_series, migrations.RunPython.noop, atomic=False
> +        ),
> +        migrations.AddField(
> +            model_name='seriesreference',
> +            name='project',
> +            field=models.ForeignKey(
> +                null=True,
> +                on_delete=django.db.models.deletion.CASCADE,
> +                to='patchwork.Project',
> +            ),
> +        ),
> +        migrations.AlterUniqueTogether(
> +            name='seriesreference', unique_together={('project', 'msgid')}
> +        ),
> +        migrations.RunPython(
> +            copy_project_field, migrations.RunPython.noop, atomic=False
> +        ),
> +        migrations.AlterField(
> +            model_name='seriesreference',
> +            name='project',
> +            field=models.ForeignKey(
> +                on_delete=django.db.models.deletion.CASCADE,
> +                to='patchwork.Project',
> +            ),
> +        ),
> +    ]
> diff --git patchwork/models.py patchwork/models.py
> index c198bc2c..e5bfecf8 100644
> --- patchwork/models.py
> +++ patchwork/models.py
> @@ -79,7 +79,8 @@ class Project(models.Model):
>      webscm_url = models.CharField(max_length=2000, blank=True)
>      list_archive_url = models.CharField(max_length=2000, blank=True)
>      list_archive_url_format = models.CharField(
> -        max_length=2000, blank=True,
> +        max_length=2000,
> +        blank=True,
>          help_text="URL format for the list archive's Message-ID redirector. "
>          "{} will be replaced by the Message-ID.")
>      commit_url_format = models.CharField(
> @@ -783,6 +784,7 @@ class SeriesReference(models.Model):
>      required to handle the case whereby one or more patches are
>      received before the cover letter.
>      """
> +    project = models.ForeignKey(Project, on_delete=models.CASCADE)
>      series = models.ForeignKey(Series, related_name='references',
>                                 related_query_name='reference',
>                                 on_delete=models.CASCADE)
> @@ -792,7 +794,7 @@ class SeriesReference(models.Model):
>          return self.msgid
>  
>      class Meta:
> -        unique_together = [('series', 'msgid')]
> +        unique_together = [('project', 'msgid')]
>  
>  
>  class Bundle(models.Model):
> diff --git patchwork/parser.py patchwork/parser.py
> index 7dc66bc0..4b837ae0 100644
> --- patchwork/parser.py
> +++ patchwork/parser.py
> @@ -16,6 +16,7 @@ import re
>  
>  from django.contrib.auth.models import User
>  from django.db.utils import IntegrityError
> +from django.db import transaction
>  from django.utils import six
>  
>  from patchwork.models import Comment
> @@ -251,16 +252,9 @@ def _find_series_by_references(project, mail):
>      for ref in refs:
>          try:
>              return SeriesReference.objects.get(
> -                msgid=ref[:255], series__project=project).series
> +                msgid=ref[:255], project=project).series
>          except SeriesReference.DoesNotExist:
>              continue
> -        except SeriesReference.MultipleObjectsReturned:
> -            # FIXME: Open bug: this can happen when we're processing
> -            # messages in parallel. Pick the first and log.
> -            logger.error("Multiple SeriesReferences for %s in project %s!" %
> -                         (ref[:255], project.name))
> -            return SeriesReference.objects.filter(
> -                msgid=ref[:255], series__project=project).first().series
>  
>  
>  def _find_series_by_markers(project, mail, author):
> @@ -1029,47 +1023,64 @@ def parse_mail(mail, list_id=None):
>          except IntegrityError:
>              raise DuplicateMailError(msgid=msgid)
>  
> -        # if we don't have a series marker, we will never have an existing
> -        # series to match against.
> -        series = None
> -        if n:
> -            series = find_series(project, mail, author)
> +        for attempt in range(10):  # arbitrary retry count
> +            try:
> +                with transaction.atomic():
> +                    # if we don't have a series marker, we will never have an
> +                    # existing series to match against.
> +                    series = None
> +                    if n:
> +                        series = find_series(project, mail, author)
> +                    else:
> +                        x = n = 1
> +
> +                    # We will create a new series if:
> +                    # - there is no existing series to assign this patch to, or
> +                    # - there is an existing series, but it already has a patch
> +                    #   with this number in it
> +                    if not series or Patch.objects.filter(
> +                            series=series, number=x).count():
> +                        series = Series.objects.create(
> +                            project=project,
> +                            date=date,
> +                            submitter=author,
> +                            version=version,
> +                            total=n)
> +
> +                        # NOTE(stephenfin) We must save references for series.
> +                        # We do this to handle the case where a later patch is
> +                        # received first. Without storing references, it would
> +                        # not be possible to identify the relationship between
> +                        # patches as the earlier patch does not reference the
> +                        # later one.
> +                        for ref in refs + [msgid]:
> +                            ref = ref[:255]
> +                            # we don't want duplicates
> +                            try:
> +                                # we could have a ref to a previous series.
> +                                # (For example, a series sent in reply to
> +                                # another series.) That should not create a
> +                                # series ref for this series, so check for the
> +                                # msg-id only, not the msg-id/series pair.
> +                                SeriesReference.objects.get(
> +                                    msgid=ref, project=project)
> +                            except SeriesReference.DoesNotExist:
> +                                SeriesReference.objects.create(
> +                                    msgid=ref, project=project, series=series)
> +                    break
> +            except IntegrityError:
> +                # we lost the race so go again
> +                logger.info('Conflict while saving series. This is '
> +                            'probably because multiple patches belonging '
> +                            'to the same series have been received at '
> +                            'once. Trying again (attempt %02d/10)',
> +                            attempt)
>          else:
> -            x = n = 1
> -
> -        # We will create a new series if:
> -        # - there is no existing series to assign this patch to, or
> -        # - there is an existing series, but it already has a patch with this
> -        #   number in it
> -        if not series or Patch.objects.filter(series=series, number=x).count():
> -            series = Series.objects.create(
> -                project=project,
> -                date=date,
> -                submitter=author,
> -                version=version,
> -                total=n)
> -
> -            # NOTE(stephenfin) We must save references for series. We
> -            # do this to handle the case where a later patch is
> -            # received first. Without storing references, it would not
> -            # be possible to identify the relationship between patches
> -            # as the earlier patch does not reference the later one.
> -            for ref in refs + [msgid]:
> -                ref = ref[:255]
> -                # we don't want duplicates
> -                try:
> -                    # we could have a ref to a previous series. (For
> -                    # example, a series sent in reply to another
> -                    # series.) That should not create a series ref
> -                    # for this series, so check for the msg-id only,
> -                    # not the msg-id/series pair.
> -                    SeriesReference.objects.get(msgid=ref,
> -                                                series__project=project)
> -                except SeriesReference.DoesNotExist:
> -                    SeriesReference.objects.create(series=series, msgid=ref)
> -                except SeriesReference.MultipleObjectsReturned:
> -                    logger.error("Multiple SeriesReferences for %s"
> -                                 " in project %s!" % (ref, project.name))
> +            # we failed to save the series so return the series-less patch
> +            logger.warning('Failed to save series. Your patch has been '
> +                           'saved but this should not happen. Please '
> +                           'report this as a bug and include logs')
> +            return patch
>  
>          # add to a series if we have found one, and we have a numbered
>          # patch. Don't add unnumbered patches (for example diffs sent
> @@ -1107,14 +1118,9 @@ def parse_mail(mail, list_id=None):
>              # message
>              try:
>                  series = SeriesReference.objects.get(
> -                    msgid=msgid, series__project=project).series
> +                    msgid=msgid, project=project).series
>              except SeriesReference.DoesNotExist:
>                  series = None
> -            except SeriesReference.MultipleObjectsReturned:
> -                logger.error("Multiple SeriesReferences for %s"
> -                             " in project %s!" % (msgid, project.name))
> -                series = SeriesReference.objects.filter(
> -                    msgid=msgid, series__project=project).first().series
>  
>              if not series:
>                  series = Series.objects.create(
> @@ -1127,12 +1133,8 @@ def parse_mail(mail, list_id=None):
>                  # we don't save the in-reply-to or references fields
>                  # for a cover letter, as they can't refer to the same
>                  # series
> -                try:
> -                    SeriesReference.objects.get_or_create(series=series,
> -                                                          msgid=msgid)
> -                except SeriesReference.MultipleObjectsReturned:
> -                    logger.error("Multiple SeriesReferences for %s"
> -                                 " in project %s!" % (msgid, project.name))
> +                SeriesReference.objects.create(
> +                    msgid=msgid, project=project, series=series)
>  
>              try:
>                  cover_letter = CoverLetter.objects.create(
> diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
> index e5a2fca3..a69c64ba 100644
> --- patchwork/tests/test_parser.py
> +++ patchwork/tests/test_parser.py
> @@ -361,17 +361,20 @@ class SeriesCorrelationTest(TestCase):
>          msgids = [make_msgid()]
>          project = create_project()
>          series_v1 = create_series(project=project)
> -        create_series_reference(msgid=msgids[0], series=series_v1)
> +        create_series_reference(msgid=msgids[0], series=series_v1,
> +                                project=project)
>  
>          # ...and three patches
>          for i in range(3):
>              msgids.append(make_msgid())
> -            create_series_reference(msgid=msgids[-1], series=series_v1)
> +            create_series_reference(msgid=msgids[-1], series=series_v1,
> +                                    project=project)
>  
>          # now create a new series with "cover letter"
>          msgids.append(make_msgid())
>          series_v2 = create_series(project=project)
> -        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
> +        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
> +                                         project=project)
>  
>          # ...and the "first patch" of this new series
>          msgid = make_msgid()
> diff --git patchwork/tests/utils.py patchwork/tests/utils.py
> index 577183d0..4ead1781 100644
> --- patchwork/tests/utils.py
> +++ patchwork/tests/utils.py
> @@ -282,8 +282,12 @@ def create_series(**kwargs):
>  
>  def create_series_reference(**kwargs):
>      """Create 'SeriesReference' object."""
> +    project = kwargs.pop('project', create_project())
> +    series = kwargs.pop('series', create_series(project=project))
> +
>      values = {
> -        'series': create_series() if 'series' not in kwargs else None,
> +        'series': series,
> +        'project': project,
>          'msgid': make_msgid(),
>      }
>      values.update(**kwargs)
> -- 
> 2.21.0
Stephen Finucane Oct. 22, 2019, 3:36 a.m. UTC | #5
On Sat, 2019-10-19 at 00:57 +1100, Daniel Axtens wrote:
> > Closes: #241
> 
> I'm not sure this is quite correct, sadly. Using the parallel parser I
> posted, I see a difference in the number of series created by parsing in
> parallel and parsing serially.
> 
> # serial (-j1)
> > > > Series.objects.count()
> 28016
> 
> # parallel (-j6)
> > > > Series.objects.count()
> 28065
> 
> 
> I investigated just the first Untitled series from the parallel case,
> where I see that one 6-patch series has been split up a lot:
> 
> > > > pprint.pprint([(p.name, p.series) for p in Patch.objects.filter(submitter__id=42)[0:6]])
> [('[1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD '
>   'CPUs',
>   <Series: [1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD CPUs>),
>  ('[2/6] x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs',
>   <Series: Untitled series #307>),
>  ('[3/6] x86: move nop declarations into separate include file',
>   <Series: Untitled series #544>),
>  ('[4/6] x86: introduce rdtsc_barrier()', <Series: Untitled series #307>),
>  ('[5/6] x86: remove get_cycles_sync', <Series: Untitled series #559>),
>  ('[6/6] x86: read_tsc sync', <Series: Untitled series #449>)]
> 
> If I do serial parsing, this does not get split up:
> 
> > > > pprint.pprint([(p.name, p.series) for p in Patch.objects.filter(submitter__id=74)[0:6]])
> [('[1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD '
>   'CPUs',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[2/6] x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[3/6] x86: move nop declarations into separate include file',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[4/6] x86: introduce rdtsc_barrier()',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[5/6] x86: remove get_cycles_sync',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[6/6] x86: read_tsc sync', <Series: Get rid of CPUID from gettimeofday>)]
> 
> I haven't looked at the series in any detail beyond this - it might
> still be worth including, but it just doesn't quite squash the
> bug, sadly.

So my only theory about why this might be happening is that this series
is shallow threaded (so all patches contain a reference to the cover
letter but not their preceding patches) and there is still a race in
creating the SeriesReference corresponding to the cover letter which
some patches are losing. However, what's confusing is that they should
be trying and hitting an IntegrityError in the losing case, resulting
in the rollback/retry of the entire transaction. Clearly that is not
the case though so it would be good to figure out what's missing.

Would you be able to dump the rows for these Patches (minus the diff,
headers and content fields), the Series, and the associated
SeriesReferences so I can try figure out how we ended up doing this? I
probably won't be able to work on it for the next two weeks (I'm on
PTO) but I'll try crack it then. This has to be possible.

Stephen

> Regards,
> Daniel
> 
> > Cc: Daniel Axtens <dja@axtens.net>
> > Cc: Petr Vorel <petr.vorel@gmail.com>
> > ---
> >  patchwork/migrations/0037_python_3.py         | 298 ++++++++++++++++++
> >  .../0038_unique_series_references.py          | 121 +++++++
> >  patchwork/models.py                           |   6 +-
> >  patchwork/parser.py                           | 122 +++----
> >  patchwork/tests/test_parser.py                |   9 +-
> >  patchwork/tests/utils.py                      |   6 +-
> >  6 files changed, 496 insertions(+), 66 deletions(-)
> >  create mode 100644 patchwork/migrations/0037_python_3.py
> >  create mode 100644 patchwork/migrations/0038_unique_series_references.py
> > 
> > diff --git patchwork/migrations/0037_python_3.py patchwork/migrations/0037_python_3.py
> > new file mode 100644
> > index 00000000..416a7045
> > --- /dev/null
> > +++ patchwork/migrations/0037_python_3.py
> > @@ -0,0 +1,298 @@
> > +import datetime
> > +
> > +from django.conf import settings
> > +from django.db import migrations, models
> > +import django.db.models.deletion
> > +
> > +import patchwork.models
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [('patchwork', '0036_project_commit_url_format')]
> > +
> > +    operations = [
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='context',
> > +            field=models.SlugField(
> > +                default='default',
> > +                help_text='A label to discern check from checks of other testing systems.',  # noqa
> > +                max_length=255,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='description',
> > +            field=models.TextField(
> > +                blank=True,
> > +                help_text='A brief description of the check.',
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='state',
> > +            field=models.SmallIntegerField(
> > +                choices=[
> > +                    (0, 'pending'),
> > +                    (1, 'success'),
> > +                    (2, 'warning'),
> > +                    (3, 'fail'),
> > +                ],
> > +                default=0,
> > +                help_text='The state of the check.',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='check',
> > +            name='target_url',
> > +            field=models.URLField(
> > +                blank=True,
> > +                help_text='The target URL to associate with this check. This should be specific to the patch.',  # noqa
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='comment',
> > +            name='submission',
> > +            field=models.ForeignKey(
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='comments',
> > +                related_query_name='comment',
> > +                to='patchwork.Submission',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='delegationrule',
> > +            name='path',
> > +            field=models.CharField(
> > +                help_text='An fnmatch-style pattern to match filenames against.',  # noqa
> > +                max_length=255,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='delegationrule',
> > +            name='priority',
> > +            field=models.IntegerField(
> > +                default=0,
> > +                help_text='The priority of the rule. Rules with a higher priority will override rules with lower priorities',  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='delegationrule',
> > +            name='user',
> > +            field=models.ForeignKey(
> > +                help_text='A user to delegate the patch to.',
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                to=settings.AUTH_USER_MODEL,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='emailconfirmation',
> > +            name='type',
> > +            field=models.CharField(
> > +                choices=[
> > +                    ('userperson', 'User-Person association'),
> > +                    ('registration', 'Registration'),
> > +                    ('optout', 'Email opt-out'),
> > +                ],
> > +                max_length=20,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='category',
> > +            field=models.CharField(
> > +                choices=[
> > +                    ('cover-created', 'Cover Letter Created'),
> > +                    ('patch-created', 'Patch Created'),
> > +                    ('patch-completed', 'Patch Completed'),
> > +                    ('patch-state-changed', 'Patch State Changed'),
> > +                    ('patch-delegated', 'Patch Delegate Changed'),
> > +                    ('check-created', 'Check Created'),
> > +                    ('series-created', 'Series Created'),
> > +                    ('series-completed', 'Series Completed'),
> > +                ],
> > +                db_index=True,
> > +                help_text='The category of the event.',
> > +                max_length=20,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='cover',
> > +            field=models.ForeignKey(
> > +                blank=True,
> > +                help_text='The cover letter that this event was created for.',
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.CoverLetter',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='date',
> > +            field=models.DateTimeField(
> > +                default=datetime.datetime.utcnow,
> > +                help_text='The time this event was created.',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='patch',
> > +            field=models.ForeignKey(
> > +                blank=True,
> > +                help_text='The patch that this event was created for.',
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.Patch',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='project',
> > +            field=models.ForeignKey(
> > +                help_text='The project that the events belongs to.',
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.Project',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='event',
> > +            name='series',
> > +            field=models.ForeignKey(
> > +                blank=True,
> > +                help_text='The series that this event was created for.',
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='+',
> > +                to='patchwork.Series',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='patch',
> > +            name='number',
> > +            field=models.PositiveSmallIntegerField(
> > +                default=None,
> > +                help_text='The number assigned to this patch in the series',
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='project',
> > +            name='commit_url_format',
> > +            field=models.CharField(
> > +                blank=True,
> > +                help_text='URL format for a particular commit. {} will be replaced by the commit SHA.',  # noqa
> > +                max_length=2000,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='project',
> > +            name='list_archive_url_format',
> > +            field=models.CharField(
> > +                blank=True,
> > +                help_text="URL format for the list archive's Message-ID redirector. {} will be replaced by the Message-ID.",  # noqa
> > +                max_length=2000,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='project',
> > +            name='subject_match',
> > +            field=models.CharField(
> > +                blank=True,
> > +                default='',
> > +                help_text='Regex to match the subject against if only part of emails sent to the list belongs to this project. Will be used with IGNORECASE and MULTILINE flags. If rules for more projects match the first one returned from DB is chosen; empty field serves as a default for every email which has no other match.',  # noqa
> > +                max_length=64,
> > +                validators=[patchwork.models.validate_regex_compiles],
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='series',
> > +            name='name',
> > +            field=models.CharField(
> > +                blank=True,
> > +                help_text='An optional name to associate with the series, e.g. "John\'s PCI series".',  # noqa
> > +                max_length=255,
> > +                null=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='series',
> > +            name='total',
> > +            field=models.IntegerField(
> > +                help_text='Number of patches in series as indicated by the subject prefix(es)'  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='series',
> > +            name='version',
> > +            field=models.IntegerField(
> > +                default=1,
> > +                help_text='Version of series as indicated by the subject prefix(es)',  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='seriesreference',
> > +            name='series',
> > +            field=models.ForeignKey(
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                related_name='references',
> > +                related_query_name='reference',
> > +                to='patchwork.Series',
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='tag',
> > +            name='abbrev',
> > +            field=models.CharField(
> > +                help_text='Short (one-or-two letter) abbreviation for the tag, used in table column headers',  # noqa
> > +                max_length=2,
> > +                unique=True,
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='tag',
> > +            name='pattern',
> > +            field=models.CharField(
> > +                help_text='A simple regex to match the tag in the content of a message. Will be used with MULTILINE and IGNORECASE flags. eg. ^Acked-by:',  # noqa
> > +                max_length=50,
> > +                validators=[patchwork.models.validate_regex_compiles],
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='tag',
> > +            name='show_column',
> > +            field=models.BooleanField(
> > +                default=True,
> > +                help_text="Show a column displaying this tag's count in the patch list view",  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='userprofile',
> > +            name='items_per_page',
> > +            field=models.PositiveIntegerField(
> > +                default=100, help_text='Number of items to display per page'
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='userprofile',
> > +            name='send_email',
> > +            field=models.BooleanField(
> > +                default=False,
> > +                help_text='Selecting this option allows patchwork to send email on your behalf',  # noqa
> > +            ),
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='userprofile',
> > +            name='show_ids',
> > +            field=models.BooleanField(
> > +                default=False,
> > +                help_text='Show click-to-copy patch IDs in the list view',
> > +            ),
> > +        ),
> > +    ]
> > diff --git patchwork/migrations/0038_unique_series_references.py patchwork/migrations/0038_unique_series_references.py
> > new file mode 100644
> > index 00000000..c5517ada
> > --- /dev/null
> > +++ patchwork/migrations/0038_unique_series_references.py
> > @@ -0,0 +1,121 @@
> > +from django.db import connection, migrations, models
> > +from django.db.models import Count
> > +import django.db.models.deletion
> > +
> > +
> > +def merge_duplicate_series(apps, schema_editor):
> > +    SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> > +    Patch = apps.get_model('patchwork', 'Patch')
> > +
> > +    msgid_seriesrefs = {}
> > +
> > +    # find all SeriesReference that share a msgid but point to different series
> > +    # and decide which of the series is going to be the authoritative one
> > +    msgid_counts = (
> > +        SeriesReference.objects.values('msgid')
> > +        .annotate(count=Count('msgid'))
> > +        .filter(count__gt=1)
> > +    )
> > +    for msgid_count in msgid_counts:
> > +        msgid = msgid_count['msgid']
> > +        chosen_ref = None
> > +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> > +            if series_ref.series.cover_letter:
> > +                if chosen_ref:
> > +                    # I don't think this can happen, but explode if it does
> > +                    raise Exception(
> > +                        "Looks like you've got two or more series that share "
> > +                        "some patches but do not share a cover letter. Unable "
> > +                        "to auto-resolve."
> > +                    )
> > +
> > +                # if a series has a cover letter, that's the one we'll group
> > +                # everything under
> > +                chosen_ref = series_ref
> > +
> > +        if not chosen_ref:
> > +            # if none of the series have cover letters, simply use the last
> > +            # one (hint: this relies on Python's weird scoping for for loops
> > +            # where 'series_ref' is still accessible outside the loop)
> > +            chosen_ref = series_ref
> > +
> > +        msgid_seriesrefs[msgid] = chosen_ref
> > +
> > +    # reassign any patches referring to non-authoritative series to point to
> > +    # the authoritative one, and delete the other series; we do this separately
> > +    # to allow us a chance to raise the exception above if necessary
> > +    for msgid, chosen_ref in msgid_seriesrefs.items():
> > +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
> > +            if series_ref == chosen_ref:
> > +                continue
> > +
> > +            # update the patches to point to our chosen series instead, on the
> > +            # assumption that all other metadata is correct
> > +            for patch in Patch.objects.filter(series=series_ref.series):
> > +                patch.series = chosen_ref.series
> > +                patch.save()
> > +
> > +            # delete the other series (which will delete the series ref)
> > +            series_ref.series.delete()
> > +
> > +
> > +def copy_project_field(apps, schema_editor):
> > +    if connection.vendor == 'postgresql':
> > +        schema_editor.execute(
> > +            """
> > +            UPDATE patchwork_seriesreference
> > +              SET project_id = patchwork_series.project_id
> > +            FROM patchwork_series
> > +              WHERE patchwork_seriesreference.series_id = patchwork_series.id
> > +        """
> > +        )
> > +    elif connection.vendor == 'mysql':
> > +        schema_editor.execute(
> > +            """
> > +            UPDATE patchwork_seriesreference, patchwork_series
> > +              SET patchwork_seriesreference.project_id = patchwork_series.project_id
> > +            WHERE patchwork_seriesreference.series_id = patchwork_series.id
> > +        """
> > +        )  # noqa
> > +    else:
> > +        SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> > +
> > +        for series_ref in SeriesReference.objects.all().select_related(
> > +            'series'
> > +        ):
> > +            series_ref.project = series_ref.series.project
> > +            series_ref.save()
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [('patchwork', '0037_python_3')]
> > +
> > +    operations = [
> > +        migrations.RunPython(
> > +            merge_duplicate_series, migrations.RunPython.noop, atomic=False
> > +        ),
> > +        migrations.AddField(
> > +            model_name='seriesreference',
> > +            name='project',
> > +            field=models.ForeignKey(
> > +                null=True,
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                to='patchwork.Project',
> > +            ),
> > +        ),
> > +        migrations.AlterUniqueTogether(
> > +            name='seriesreference', unique_together={('project', 'msgid')}
> > +        ),
> > +        migrations.RunPython(
> > +            copy_project_field, migrations.RunPython.noop, atomic=False
> > +        ),
> > +        migrations.AlterField(
> > +            model_name='seriesreference',
> > +            name='project',
> > +            field=models.ForeignKey(
> > +                on_delete=django.db.models.deletion.CASCADE,
> > +                to='patchwork.Project',
> > +            ),
> > +        ),
> > +    ]
> > diff --git patchwork/models.py patchwork/models.py
> > index c198bc2c..e5bfecf8 100644
> > --- patchwork/models.py
> > +++ patchwork/models.py
> > @@ -79,7 +79,8 @@ class Project(models.Model):
> >      webscm_url = models.CharField(max_length=2000, blank=True)
> >      list_archive_url = models.CharField(max_length=2000, blank=True)
> >      list_archive_url_format = models.CharField(
> > -        max_length=2000, blank=True,
> > +        max_length=2000,
> > +        blank=True,
> >          help_text="URL format for the list archive's Message-ID redirector. "
> >          "{} will be replaced by the Message-ID.")
> >      commit_url_format = models.CharField(
> > @@ -783,6 +784,7 @@ class SeriesReference(models.Model):
> >      required to handle the case whereby one or more patches are
> >      received before the cover letter.
> >      """
> > +    project = models.ForeignKey(Project, on_delete=models.CASCADE)
> >      series = models.ForeignKey(Series, related_name='references',
> >                                 related_query_name='reference',
> >                                 on_delete=models.CASCADE)
> > @@ -792,7 +794,7 @@ class SeriesReference(models.Model):
> >          return self.msgid
> >  
> >      class Meta:
> > -        unique_together = [('series', 'msgid')]
> > +        unique_together = [('project', 'msgid')]
> >  
> >  
> >  class Bundle(models.Model):
> > diff --git patchwork/parser.py patchwork/parser.py
> > index 7dc66bc0..4b837ae0 100644
> > --- patchwork/parser.py
> > +++ patchwork/parser.py
> > @@ -16,6 +16,7 @@ import re
> >  
> >  from django.contrib.auth.models import User
> >  from django.db.utils import IntegrityError
> > +from django.db import transaction
> >  from django.utils import six
> >  
> >  from patchwork.models import Comment
> > @@ -251,16 +252,9 @@ def _find_series_by_references(project, mail):
> >      for ref in refs:
> >          try:
> >              return SeriesReference.objects.get(
> > -                msgid=ref[:255], series__project=project).series
> > +                msgid=ref[:255], project=project).series
> >          except SeriesReference.DoesNotExist:
> >              continue
> > -        except SeriesReference.MultipleObjectsReturned:
> > -            # FIXME: Open bug: this can happen when we're processing
> > -            # messages in parallel. Pick the first and log.
> > -            logger.error("Multiple SeriesReferences for %s in project %s!" %
> > -                         (ref[:255], project.name))
> > -            return SeriesReference.objects.filter(
> > -                msgid=ref[:255], series__project=project).first().series
> >  
> >  
> >  def _find_series_by_markers(project, mail, author):
> > @@ -1029,47 +1023,64 @@ def parse_mail(mail, list_id=None):
> >          except IntegrityError:
> >              raise DuplicateMailError(msgid=msgid)
> >  
> > -        # if we don't have a series marker, we will never have an existing
> > -        # series to match against.
> > -        series = None
> > -        if n:
> > -            series = find_series(project, mail, author)
> > +        for attempt in range(10):  # arbitrary retry count
> > +            try:
> > +                with transaction.atomic():
> > +                    # if we don't have a series marker, we will never have an
> > +                    # existing series to match against.
> > +                    series = None
> > +                    if n:
> > +                        series = find_series(project, mail, author)
> > +                    else:
> > +                        x = n = 1
> > +
> > +                    # We will create a new series if:
> > +                    # - there is no existing series to assign this patch to, or
> > +                    # - there is an existing series, but it already has a patch
> > +                    #   with this number in it
> > +                    if not series or Patch.objects.filter(
> > +                            series=series, number=x).count():
> > +                        series = Series.objects.create(
> > +                            project=project,
> > +                            date=date,
> > +                            submitter=author,
> > +                            version=version,
> > +                            total=n)
> > +
> > +                        # NOTE(stephenfin) We must save references for series.
> > +                        # We do this to handle the case where a later patch is
> > +                        # received first. Without storing references, it would
> > +                        # not be possible to identify the relationship between
> > +                        # patches as the earlier patch does not reference the
> > +                        # later one.
> > +                        for ref in refs + [msgid]:
> > +                            ref = ref[:255]
> > +                            # we don't want duplicates
> > +                            try:
> > +                                # we could have a ref to a previous series.
> > +                                # (For example, a series sent in reply to
> > +                                # another series.) That should not create a
> > +                                # series ref for this series, so check for the
> > +                                # msg-id only, not the msg-id/series pair.
> > +                                SeriesReference.objects.get(
> > +                                    msgid=ref, project=project)
> > +                            except SeriesReference.DoesNotExist:
> > +                                SeriesReference.objects.create(
> > +                                    msgid=ref, project=project, series=series)
> > +                    break
> > +            except IntegrityError:
> > +                # we lost the race so go again
> > +                logger.info('Conflict while saving series. This is '
> > +                            'probably because multiple patches belonging '
> > +                            'to the same series have been received at '
> > +                            'once. Trying again (attempt %02d/10)',
> > +                            attempt)
> >          else:
> > -            x = n = 1
> > -
> > -        # We will create a new series if:
> > -        # - there is no existing series to assign this patch to, or
> > -        # - there is an existing series, but it already has a patch with this
> > -        #   number in it
> > -        if not series or Patch.objects.filter(series=series, number=x).count():
> > -            series = Series.objects.create(
> > -                project=project,
> > -                date=date,
> > -                submitter=author,
> > -                version=version,
> > -                total=n)
> > -
> > -            # NOTE(stephenfin) We must save references for series. We
> > -            # do this to handle the case where a later patch is
> > -            # received first. Without storing references, it would not
> > -            # be possible to identify the relationship between patches
> > -            # as the earlier patch does not reference the later one.
> > -            for ref in refs + [msgid]:
> > -                ref = ref[:255]
> > -                # we don't want duplicates
> > -                try:
> > -                    # we could have a ref to a previous series. (For
> > -                    # example, a series sent in reply to another
> > -                    # series.) That should not create a series ref
> > -                    # for this series, so check for the msg-id only,
> > -                    # not the msg-id/series pair.
> > -                    SeriesReference.objects.get(msgid=ref,
> > -                                                series__project=project)
> > -                except SeriesReference.DoesNotExist:
> > -                    SeriesReference.objects.create(series=series, msgid=ref)
> > -                except SeriesReference.MultipleObjectsReturned:
> > -                    logger.error("Multiple SeriesReferences for %s"
> > -                                 " in project %s!" % (ref, project.name))
> > +            # we failed to save the series so return the series-less patch
> > +            logger.warning('Failed to save series. Your patch has been '
> > +                           'saved but this should not happen. Please '
> > +                           'report this as a bug and include logs')
> > +            return patch
> >  
> >          # add to a series if we have found one, and we have a numbered
> >          # patch. Don't add unnumbered patches (for example diffs sent
> > @@ -1107,14 +1118,9 @@ def parse_mail(mail, list_id=None):
> >              # message
> >              try:
> >                  series = SeriesReference.objects.get(
> > -                    msgid=msgid, series__project=project).series
> > +                    msgid=msgid, project=project).series
> >              except SeriesReference.DoesNotExist:
> >                  series = None
> > -            except SeriesReference.MultipleObjectsReturned:
> > -                logger.error("Multiple SeriesReferences for %s"
> > -                             " in project %s!" % (msgid, project.name))
> > -                series = SeriesReference.objects.filter(
> > -                    msgid=msgid, series__project=project).first().series
> >  
> >              if not series:
> >                  series = Series.objects.create(
> > @@ -1127,12 +1133,8 @@ def parse_mail(mail, list_id=None):
> >                  # we don't save the in-reply-to or references fields
> >                  # for a cover letter, as they can't refer to the same
> >                  # series
> > -                try:
> > -                    SeriesReference.objects.get_or_create(series=series,
> > -                                                          msgid=msgid)
> > -                except SeriesReference.MultipleObjectsReturned:
> > -                    logger.error("Multiple SeriesReferences for %s"
> > -                                 " in project %s!" % (msgid, project.name))
> > +                SeriesReference.objects.create(
> > +                    msgid=msgid, project=project, series=series)
> >  
> >              try:
> >                  cover_letter = CoverLetter.objects.create(
> > diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
> > index e5a2fca3..a69c64ba 100644
> > --- patchwork/tests/test_parser.py
> > +++ patchwork/tests/test_parser.py
> > @@ -361,17 +361,20 @@ class SeriesCorrelationTest(TestCase):
> >          msgids = [make_msgid()]
> >          project = create_project()
> >          series_v1 = create_series(project=project)
> > -        create_series_reference(msgid=msgids[0], series=series_v1)
> > +        create_series_reference(msgid=msgids[0], series=series_v1,
> > +                                project=project)
> >  
> >          # ...and three patches
> >          for i in range(3):
> >              msgids.append(make_msgid())
> > -            create_series_reference(msgid=msgids[-1], series=series_v1)
> > +            create_series_reference(msgid=msgids[-1], series=series_v1,
> > +                                    project=project)
> >  
> >          # now create a new series with "cover letter"
> >          msgids.append(make_msgid())
> >          series_v2 = create_series(project=project)
> > -        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
> > +        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
> > +                                         project=project)
> >  
> >          # ...and the "first patch" of this new series
> >          msgid = make_msgid()
> > diff --git patchwork/tests/utils.py patchwork/tests/utils.py
> > index 577183d0..4ead1781 100644
> > --- patchwork/tests/utils.py
> > +++ patchwork/tests/utils.py
> > @@ -282,8 +282,12 @@ def create_series(**kwargs):
> >  
> >  def create_series_reference(**kwargs):
> >      """Create 'SeriesReference' object."""
> > +    project = kwargs.pop('project', create_project())
> > +    series = kwargs.pop('series', create_series(project=project))
> > +
> >      values = {
> > -        'series': create_series() if 'series' not in kwargs else None,
> > +        'series': series,
> > +        'project': project,
> >          'msgid': make_msgid(),
> >      }
> >      values.update(**kwargs)
> > -- 
> > 2.21.0
Daniel Axtens Oct. 29, 2019, 4 p.m. UTC | #6
Daniel Axtens <dja@axtens.net> writes:

>> Closes: #241
>
> I'm not sure this is quite correct, sadly. Using the parallel parser I
> posted, I see a difference in the number of series created by parsing in
> parallel and parsing serially.
>
> # serial (-j1)
>>>> Series.objects.count()
> 28016
>
> # parallel (-j6)
>>>> Series.objects.count()
> 28065
>
>
> I investigated just the first Untitled series from the parallel case,
> where I see that one 6-patch series has been split up a lot:
>
>>>> pprint.pprint([(p.name, p.series) for p in Patch.objects.filter(submitter__id=42)[0:6]])
> [('[1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD '
>   'CPUs',
>   <Series: [1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD CPUs>),
>  ('[2/6] x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs',
>   <Series: Untitled series #307>),
>  ('[3/6] x86: move nop declarations into separate include file',
>   <Series: Untitled series #544>),
>  ('[4/6] x86: introduce rdtsc_barrier()', <Series: Untitled series #307>),
>  ('[5/6] x86: remove get_cycles_sync', <Series: Untitled series #559>),
>  ('[6/6] x86: read_tsc sync', <Series: Untitled series #449>)]
>
> If I do serial parsing, this does not get split up:
>
>>>> pprint.pprint([(p.name, p.series) for p in Patch.objects.filter(submitter__id=74)[0:6]])
> [('[1/6] x86: implement support to synchronize RDTSC through MFENCE on AMD '
>   'CPUs',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[2/6] x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[3/6] x86: move nop declarations into separate include file',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[4/6] x86: introduce rdtsc_barrier()',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[5/6] x86: remove get_cycles_sync',
>   <Series: Get rid of CPUID from gettimeofday>),
>  ('[6/6] x86: read_tsc sync', <Series: Get rid of CPUID from gettimeofday>)]
>
> I haven't looked at the series in any detail beyond this - it might
> still be worth including, but it just doesn't quite squash the
> bug, sadly.

I've just implemented an alternative approach (put a field in Series
that represents the 'head' message-id), and also saw this split up - it
turns out that it doesn't contain References or In-Reply-To. I'll have a
look to see if any are split that do contain those headers.

Regards,
Daniel

>
> Regards,
> Daniel
>
>> Cc: Daniel Axtens <dja@axtens.net>
>> Cc: Petr Vorel <petr.vorel@gmail.com>
>> ---
>>  patchwork/migrations/0037_python_3.py         | 298 ++++++++++++++++++
>>  .../0038_unique_series_references.py          | 121 +++++++
>>  patchwork/models.py                           |   6 +-
>>  patchwork/parser.py                           | 122 +++----
>>  patchwork/tests/test_parser.py                |   9 +-
>>  patchwork/tests/utils.py                      |   6 +-
>>  6 files changed, 496 insertions(+), 66 deletions(-)
>>  create mode 100644 patchwork/migrations/0037_python_3.py
>>  create mode 100644 patchwork/migrations/0038_unique_series_references.py
>>
>> diff --git patchwork/migrations/0037_python_3.py patchwork/migrations/0037_python_3.py
>> new file mode 100644
>> index 00000000..416a7045
>> --- /dev/null
>> +++ patchwork/migrations/0037_python_3.py
>> @@ -0,0 +1,298 @@
>> +import datetime
>> +
>> +from django.conf import settings
>> +from django.db import migrations, models
>> +import django.db.models.deletion
>> +
>> +import patchwork.models
>> +
>> +
>> +class Migration(migrations.Migration):
>> +
>> +    dependencies = [('patchwork', '0036_project_commit_url_format')]
>> +
>> +    operations = [
>> +        migrations.AlterField(
>> +            model_name='check',
>> +            name='context',
>> +            field=models.SlugField(
>> +                default='default',
>> +                help_text='A label to discern check from checks of other testing systems.',  # noqa
>> +                max_length=255,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='check',
>> +            name='description',
>> +            field=models.TextField(
>> +                blank=True,
>> +                help_text='A brief description of the check.',
>> +                null=True,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='check',
>> +            name='state',
>> +            field=models.SmallIntegerField(
>> +                choices=[
>> +                    (0, 'pending'),
>> +                    (1, 'success'),
>> +                    (2, 'warning'),
>> +                    (3, 'fail'),
>> +                ],
>> +                default=0,
>> +                help_text='The state of the check.',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='check',
>> +            name='target_url',
>> +            field=models.URLField(
>> +                blank=True,
>> +                help_text='The target URL to associate with this check. This should be specific to the patch.',  # noqa
>> +                null=True,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='comment',
>> +            name='submission',
>> +            field=models.ForeignKey(
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                related_name='comments',
>> +                related_query_name='comment',
>> +                to='patchwork.Submission',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='delegationrule',
>> +            name='path',
>> +            field=models.CharField(
>> +                help_text='An fnmatch-style pattern to match filenames against.',  # noqa
>> +                max_length=255,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='delegationrule',
>> +            name='priority',
>> +            field=models.IntegerField(
>> +                default=0,
>> +                help_text='The priority of the rule. Rules with a higher priority will override rules with lower priorities',  # noqa
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='delegationrule',
>> +            name='user',
>> +            field=models.ForeignKey(
>> +                help_text='A user to delegate the patch to.',
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                to=settings.AUTH_USER_MODEL,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='emailconfirmation',
>> +            name='type',
>> +            field=models.CharField(
>> +                choices=[
>> +                    ('userperson', 'User-Person association'),
>> +                    ('registration', 'Registration'),
>> +                    ('optout', 'Email opt-out'),
>> +                ],
>> +                max_length=20,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='event',
>> +            name='category',
>> +            field=models.CharField(
>> +                choices=[
>> +                    ('cover-created', 'Cover Letter Created'),
>> +                    ('patch-created', 'Patch Created'),
>> +                    ('patch-completed', 'Patch Completed'),
>> +                    ('patch-state-changed', 'Patch State Changed'),
>> +                    ('patch-delegated', 'Patch Delegate Changed'),
>> +                    ('check-created', 'Check Created'),
>> +                    ('series-created', 'Series Created'),
>> +                    ('series-completed', 'Series Completed'),
>> +                ],
>> +                db_index=True,
>> +                help_text='The category of the event.',
>> +                max_length=20,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='event',
>> +            name='cover',
>> +            field=models.ForeignKey(
>> +                blank=True,
>> +                help_text='The cover letter that this event was created for.',
>> +                null=True,
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                related_name='+',
>> +                to='patchwork.CoverLetter',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='event',
>> +            name='date',
>> +            field=models.DateTimeField(
>> +                default=datetime.datetime.utcnow,
>> +                help_text='The time this event was created.',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='event',
>> +            name='patch',
>> +            field=models.ForeignKey(
>> +                blank=True,
>> +                help_text='The patch that this event was created for.',
>> +                null=True,
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                related_name='+',
>> +                to='patchwork.Patch',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='event',
>> +            name='project',
>> +            field=models.ForeignKey(
>> +                help_text='The project that the events belongs to.',
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                related_name='+',
>> +                to='patchwork.Project',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='event',
>> +            name='series',
>> +            field=models.ForeignKey(
>> +                blank=True,
>> +                help_text='The series that this event was created for.',
>> +                null=True,
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                related_name='+',
>> +                to='patchwork.Series',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='patch',
>> +            name='number',
>> +            field=models.PositiveSmallIntegerField(
>> +                default=None,
>> +                help_text='The number assigned to this patch in the series',
>> +                null=True,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='project',
>> +            name='commit_url_format',
>> +            field=models.CharField(
>> +                blank=True,
>> +                help_text='URL format for a particular commit. {} will be replaced by the commit SHA.',  # noqa
>> +                max_length=2000,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='project',
>> +            name='list_archive_url_format',
>> +            field=models.CharField(
>> +                blank=True,
>> +                help_text="URL format for the list archive's Message-ID redirector. {} will be replaced by the Message-ID.",  # noqa
>> +                max_length=2000,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='project',
>> +            name='subject_match',
>> +            field=models.CharField(
>> +                blank=True,
>> +                default='',
>> +                help_text='Regex to match the subject against if only part of emails sent to the list belongs to this project. Will be used with IGNORECASE and MULTILINE flags. If rules for more projects match the first one returned from DB is chosen; empty field serves as a default for every email which has no other match.',  # noqa
>> +                max_length=64,
>> +                validators=[patchwork.models.validate_regex_compiles],
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='series',
>> +            name='name',
>> +            field=models.CharField(
>> +                blank=True,
>> +                help_text='An optional name to associate with the series, e.g. "John\'s PCI series".',  # noqa
>> +                max_length=255,
>> +                null=True,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='series',
>> +            name='total',
>> +            field=models.IntegerField(
>> +                help_text='Number of patches in series as indicated by the subject prefix(es)'  # noqa
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='series',
>> +            name='version',
>> +            field=models.IntegerField(
>> +                default=1,
>> +                help_text='Version of series as indicated by the subject prefix(es)',  # noqa
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='seriesreference',
>> +            name='series',
>> +            field=models.ForeignKey(
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                related_name='references',
>> +                related_query_name='reference',
>> +                to='patchwork.Series',
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='tag',
>> +            name='abbrev',
>> +            field=models.CharField(
>> +                help_text='Short (one-or-two letter) abbreviation for the tag, used in table column headers',  # noqa
>> +                max_length=2,
>> +                unique=True,
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='tag',
>> +            name='pattern',
>> +            field=models.CharField(
>> +                help_text='A simple regex to match the tag in the content of a message. Will be used with MULTILINE and IGNORECASE flags. eg. ^Acked-by:',  # noqa
>> +                max_length=50,
>> +                validators=[patchwork.models.validate_regex_compiles],
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='tag',
>> +            name='show_column',
>> +            field=models.BooleanField(
>> +                default=True,
>> +                help_text="Show a column displaying this tag's count in the patch list view",  # noqa
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='userprofile',
>> +            name='items_per_page',
>> +            field=models.PositiveIntegerField(
>> +                default=100, help_text='Number of items to display per page'
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='userprofile',
>> +            name='send_email',
>> +            field=models.BooleanField(
>> +                default=False,
>> +                help_text='Selecting this option allows patchwork to send email on your behalf',  # noqa
>> +            ),
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='userprofile',
>> +            name='show_ids',
>> +            field=models.BooleanField(
>> +                default=False,
>> +                help_text='Show click-to-copy patch IDs in the list view',
>> +            ),
>> +        ),
>> +    ]
>> diff --git patchwork/migrations/0038_unique_series_references.py patchwork/migrations/0038_unique_series_references.py
>> new file mode 100644
>> index 00000000..c5517ada
>> --- /dev/null
>> +++ patchwork/migrations/0038_unique_series_references.py
>> @@ -0,0 +1,121 @@
>> +from django.db import connection, migrations, models
>> +from django.db.models import Count
>> +import django.db.models.deletion
>> +
>> +
>> +def merge_duplicate_series(apps, schema_editor):
>> +    SeriesReference = apps.get_model('patchwork', 'SeriesReference')
>> +    Patch = apps.get_model('patchwork', 'Patch')
>> +
>> +    msgid_seriesrefs = {}
>> +
>> +    # find all SeriesReference that share a msgid but point to different series
>> +    # and decide which of the series is going to be the authoritative one
>> +    msgid_counts = (
>> +        SeriesReference.objects.values('msgid')
>> +        .annotate(count=Count('msgid'))
>> +        .filter(count__gt=1)
>> +    )
>> +    for msgid_count in msgid_counts:
>> +        msgid = msgid_count['msgid']
>> +        chosen_ref = None
>> +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
>> +            if series_ref.series.cover_letter:
>> +                if chosen_ref:
>> +                    # I don't think this can happen, but explode if it does
>> +                    raise Exception(
>> +                        "Looks like you've got two or more series that share "
>> +                        "some patches but do not share a cover letter. Unable "
>> +                        "to auto-resolve."
>> +                    )
>> +
>> +                # if a series has a cover letter, that's the one we'll group
>> +                # everything under
>> +                chosen_ref = series_ref
>> +
>> +        if not chosen_ref:
>> +            # if none of the series have cover letters, simply use the last
>> +            # one (hint: this relies on Python's weird scoping for for loops
>> +            # where 'series_ref' is still accessible outside the loop)
>> +            chosen_ref = series_ref
>> +
>> +        msgid_seriesrefs[msgid] = chosen_ref
>> +
>> +    # reassign any patches referring to non-authoritative series to point to
>> +    # the authoritative one, and delete the other series; we do this separately
>> +    # to allow us a chance to raise the exception above if necessary
>> +    for msgid, chosen_ref in msgid_seriesrefs.items():
>> +        for series_ref in SeriesReference.objects.filter(msgid=msgid):
>> +            if series_ref == chosen_ref:
>> +                continue
>> +
>> +            # update the patches to point to our chosen series instead, on the
>> +            # assumption that all other metadata is correct
>> +            for patch in Patch.objects.filter(series=series_ref.series):
>> +                patch.series = chosen_ref.series
>> +                patch.save()
>> +
>> +            # delete the other series (which will delete the series ref)
>> +            series_ref.series.delete()
>> +
>> +
>> +def copy_project_field(apps, schema_editor):
>> +    if connection.vendor == 'postgresql':
>> +        schema_editor.execute(
>> +            """
>> +            UPDATE patchwork_seriesreference
>> +              SET project_id = patchwork_series.project_id
>> +            FROM patchwork_series
>> +              WHERE patchwork_seriesreference.series_id = patchwork_series.id
>> +        """
>> +        )
>> +    elif connection.vendor == 'mysql':
>> +        schema_editor.execute(
>> +            """
>> +            UPDATE patchwork_seriesreference, patchwork_series
>> +              SET patchwork_seriesreference.project_id = patchwork_series.project_id
>> +            WHERE patchwork_seriesreference.series_id = patchwork_series.id
>> +        """
>> +        )  # noqa
>> +    else:
>> +        SeriesReference = apps.get_model('patchwork', 'SeriesReference')
>> +
>> +        for series_ref in SeriesReference.objects.all().select_related(
>> +            'series'
>> +        ):
>> +            series_ref.project = series_ref.series.project
>> +            series_ref.save()
>> +
>> +
>> +class Migration(migrations.Migration):
>> +
>> +    dependencies = [('patchwork', '0037_python_3')]
>> +
>> +    operations = [
>> +        migrations.RunPython(
>> +            merge_duplicate_series, migrations.RunPython.noop, atomic=False
>> +        ),
>> +        migrations.AddField(
>> +            model_name='seriesreference',
>> +            name='project',
>> +            field=models.ForeignKey(
>> +                null=True,
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                to='patchwork.Project',
>> +            ),
>> +        ),
>> +        migrations.AlterUniqueTogether(
>> +            name='seriesreference', unique_together={('project', 'msgid')}
>> +        ),
>> +        migrations.RunPython(
>> +            copy_project_field, migrations.RunPython.noop, atomic=False
>> +        ),
>> +        migrations.AlterField(
>> +            model_name='seriesreference',
>> +            name='project',
>> +            field=models.ForeignKey(
>> +                on_delete=django.db.models.deletion.CASCADE,
>> +                to='patchwork.Project',
>> +            ),
>> +        ),
>> +    ]
>> diff --git patchwork/models.py patchwork/models.py
>> index c198bc2c..e5bfecf8 100644
>> --- patchwork/models.py
>> +++ patchwork/models.py
>> @@ -79,7 +79,8 @@ class Project(models.Model):
>>      webscm_url = models.CharField(max_length=2000, blank=True)
>>      list_archive_url = models.CharField(max_length=2000, blank=True)
>>      list_archive_url_format = models.CharField(
>> -        max_length=2000, blank=True,
>> +        max_length=2000,
>> +        blank=True,
>>          help_text="URL format for the list archive's Message-ID redirector. "
>>          "{} will be replaced by the Message-ID.")
>>      commit_url_format = models.CharField(
>> @@ -783,6 +784,7 @@ class SeriesReference(models.Model):
>>      required to handle the case whereby one or more patches are
>>      received before the cover letter.
>>      """
>> +    project = models.ForeignKey(Project, on_delete=models.CASCADE)
>>      series = models.ForeignKey(Series, related_name='references',
>>                                 related_query_name='reference',
>>                                 on_delete=models.CASCADE)
>> @@ -792,7 +794,7 @@ class SeriesReference(models.Model):
>>          return self.msgid
>>  
>>      class Meta:
>> -        unique_together = [('series', 'msgid')]
>> +        unique_together = [('project', 'msgid')]
>>  
>>  
>>  class Bundle(models.Model):
>> diff --git patchwork/parser.py patchwork/parser.py
>> index 7dc66bc0..4b837ae0 100644
>> --- patchwork/parser.py
>> +++ patchwork/parser.py
>> @@ -16,6 +16,7 @@ import re
>>  
>>  from django.contrib.auth.models import User
>>  from django.db.utils import IntegrityError
>> +from django.db import transaction
>>  from django.utils import six
>>  
>>  from patchwork.models import Comment
>> @@ -251,16 +252,9 @@ def _find_series_by_references(project, mail):
>>      for ref in refs:
>>          try:
>>              return SeriesReference.objects.get(
>> -                msgid=ref[:255], series__project=project).series
>> +                msgid=ref[:255], project=project).series
>>          except SeriesReference.DoesNotExist:
>>              continue
>> -        except SeriesReference.MultipleObjectsReturned:
>> -            # FIXME: Open bug: this can happen when we're processing
>> -            # messages in parallel. Pick the first and log.
>> -            logger.error("Multiple SeriesReferences for %s in project %s!" %
>> -                         (ref[:255], project.name))
>> -            return SeriesReference.objects.filter(
>> -                msgid=ref[:255], series__project=project).first().series
>>  
>>  
>>  def _find_series_by_markers(project, mail, author):
>> @@ -1029,47 +1023,64 @@ def parse_mail(mail, list_id=None):
>>          except IntegrityError:
>>              raise DuplicateMailError(msgid=msgid)
>>  
>> -        # if we don't have a series marker, we will never have an existing
>> -        # series to match against.
>> -        series = None
>> -        if n:
>> -            series = find_series(project, mail, author)
>> +        for attempt in range(10):  # arbitrary retry count
>> +            try:
>> +                with transaction.atomic():
>> +                    # if we don't have a series marker, we will never have an
>> +                    # existing series to match against.
>> +                    series = None
>> +                    if n:
>> +                        series = find_series(project, mail, author)
>> +                    else:
>> +                        x = n = 1
>> +
>> +                    # We will create a new series if:
>> +                    # - there is no existing series to assign this patch to, or
>> +                    # - there is an existing series, but it already has a patch
>> +                    #   with this number in it
>> +                    if not series or Patch.objects.filter(
>> +                            series=series, number=x).count():
>> +                        series = Series.objects.create(
>> +                            project=project,
>> +                            date=date,
>> +                            submitter=author,
>> +                            version=version,
>> +                            total=n)
>> +
>> +                        # NOTE(stephenfin) We must save references for series.
>> +                        # We do this to handle the case where a later patch is
>> +                        # received first. Without storing references, it would
>> +                        # not be possible to identify the relationship between
>> +                        # patches as the earlier patch does not reference the
>> +                        # later one.
>> +                        for ref in refs + [msgid]:
>> +                            ref = ref[:255]
>> +                            # we don't want duplicates
>> +                            try:
>> +                                # we could have a ref to a previous series.
>> +                                # (For example, a series sent in reply to
>> +                                # another series.) That should not create a
>> +                                # series ref for this series, so check for the
>> +                                # msg-id only, not the msg-id/series pair.
>> +                                SeriesReference.objects.get(
>> +                                    msgid=ref, project=project)
>> +                            except SeriesReference.DoesNotExist:
>> +                                SeriesReference.objects.create(
>> +                                    msgid=ref, project=project, series=series)
>> +                    break
>> +            except IntegrityError:
>> +                # we lost the race so go again
>> +                logger.info('Conflict while saving series. This is '
>> +                            'probably because multiple patches belonging '
>> +                            'to the same series have been received at '
>> +                            'once. Trying again (attempt %02d/10)',
>> +                            attempt)
>>          else:
>> -            x = n = 1
>> -
>> -        # We will create a new series if:
>> -        # - there is no existing series to assign this patch to, or
>> -        # - there is an existing series, but it already has a patch with this
>> -        #   number in it
>> -        if not series or Patch.objects.filter(series=series, number=x).count():
>> -            series = Series.objects.create(
>> -                project=project,
>> -                date=date,
>> -                submitter=author,
>> -                version=version,
>> -                total=n)
>> -
>> -            # NOTE(stephenfin) We must save references for series. We
>> -            # do this to handle the case where a later patch is
>> -            # received first. Without storing references, it would not
>> -            # be possible to identify the relationship between patches
>> -            # as the earlier patch does not reference the later one.
>> -            for ref in refs + [msgid]:
>> -                ref = ref[:255]
>> -                # we don't want duplicates
>> -                try:
>> -                    # we could have a ref to a previous series. (For
>> -                    # example, a series sent in reply to another
>> -                    # series.) That should not create a series ref
>> -                    # for this series, so check for the msg-id only,
>> -                    # not the msg-id/series pair.
>> -                    SeriesReference.objects.get(msgid=ref,
>> -                                                series__project=project)
>> -                except SeriesReference.DoesNotExist:
>> -                    SeriesReference.objects.create(series=series, msgid=ref)
>> -                except SeriesReference.MultipleObjectsReturned:
>> -                    logger.error("Multiple SeriesReferences for %s"
>> -                                 " in project %s!" % (ref, project.name))
>> +            # we failed to save the series so return the series-less patch
>> +            logger.warning('Failed to save series. Your patch has been '
>> +                           'saved but this should not happen. Please '
>> +                           'report this as a bug and include logs')
>> +            return patch
>>  
>>          # add to a series if we have found one, and we have a numbered
>>          # patch. Don't add unnumbered patches (for example diffs sent
>> @@ -1107,14 +1118,9 @@ def parse_mail(mail, list_id=None):
>>              # message
>>              try:
>>                  series = SeriesReference.objects.get(
>> -                    msgid=msgid, series__project=project).series
>> +                    msgid=msgid, project=project).series
>>              except SeriesReference.DoesNotExist:
>>                  series = None
>> -            except SeriesReference.MultipleObjectsReturned:
>> -                logger.error("Multiple SeriesReferences for %s"
>> -                             " in project %s!" % (msgid, project.name))
>> -                series = SeriesReference.objects.filter(
>> -                    msgid=msgid, series__project=project).first().series
>>  
>>              if not series:
>>                  series = Series.objects.create(
>> @@ -1127,12 +1133,8 @@ def parse_mail(mail, list_id=None):
>>                  # we don't save the in-reply-to or references fields
>>                  # for a cover letter, as they can't refer to the same
>>                  # series
>> -                try:
>> -                    SeriesReference.objects.get_or_create(series=series,
>> -                                                          msgid=msgid)
>> -                except SeriesReference.MultipleObjectsReturned:
>> -                    logger.error("Multiple SeriesReferences for %s"
>> -                                 " in project %s!" % (msgid, project.name))
>> +                SeriesReference.objects.create(
>> +                    msgid=msgid, project=project, series=series)
>>  
>>              try:
>>                  cover_letter = CoverLetter.objects.create(
>> diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
>> index e5a2fca3..a69c64ba 100644
>> --- patchwork/tests/test_parser.py
>> +++ patchwork/tests/test_parser.py
>> @@ -361,17 +361,20 @@ class SeriesCorrelationTest(TestCase):
>>          msgids = [make_msgid()]
>>          project = create_project()
>>          series_v1 = create_series(project=project)
>> -        create_series_reference(msgid=msgids[0], series=series_v1)
>> +        create_series_reference(msgid=msgids[0], series=series_v1,
>> +                                project=project)
>>  
>>          # ...and three patches
>>          for i in range(3):
>>              msgids.append(make_msgid())
>> -            create_series_reference(msgid=msgids[-1], series=series_v1)
>> +            create_series_reference(msgid=msgids[-1], series=series_v1,
>> +                                    project=project)
>>  
>>          # now create a new series with "cover letter"
>>          msgids.append(make_msgid())
>>          series_v2 = create_series(project=project)
>> -        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
>> +        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
>> +                                         project=project)
>>  
>>          # ...and the "first patch" of this new series
>>          msgid = make_msgid()
>> diff --git patchwork/tests/utils.py patchwork/tests/utils.py
>> index 577183d0..4ead1781 100644
>> --- patchwork/tests/utils.py
>> +++ patchwork/tests/utils.py
>> @@ -282,8 +282,12 @@ def create_series(**kwargs):
>>  
>>  def create_series_reference(**kwargs):
>>      """Create 'SeriesReference' object."""
>> +    project = kwargs.pop('project', create_project())
>> +    series = kwargs.pop('series', create_series(project=project))
>> +
>>      values = {
>> -        'series': create_series() if 'series' not in kwargs else None,
>> +        'series': series,
>> +        'project': project,
>>          'msgid': make_msgid(),
>>      }
>>      values.update(**kwargs)
>> -- 
>> 2.21.0
diff mbox series

Patch

diff --git patchwork/migrations/0037_python_3.py patchwork/migrations/0037_python_3.py
new file mode 100644
index 00000000..416a7045
--- /dev/null
+++ patchwork/migrations/0037_python_3.py
@@ -0,0 +1,298 @@ 
+import datetime
+
+from django.conf import settings
+from django.db import migrations, models
+import django.db.models.deletion
+
+import patchwork.models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [('patchwork', '0036_project_commit_url_format')]
+
+    operations = [
+        migrations.AlterField(
+            model_name='check',
+            name='context',
+            field=models.SlugField(
+                default='default',
+                help_text='A label to discern check from checks of other testing systems.',  # noqa
+                max_length=255,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='check',
+            name='description',
+            field=models.TextField(
+                blank=True,
+                help_text='A brief description of the check.',
+                null=True,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='check',
+            name='state',
+            field=models.SmallIntegerField(
+                choices=[
+                    (0, 'pending'),
+                    (1, 'success'),
+                    (2, 'warning'),
+                    (3, 'fail'),
+                ],
+                default=0,
+                help_text='The state of the check.',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='check',
+            name='target_url',
+            field=models.URLField(
+                blank=True,
+                help_text='The target URL to associate with this check. This should be specific to the patch.',  # noqa
+                null=True,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='comment',
+            name='submission',
+            field=models.ForeignKey(
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='comments',
+                related_query_name='comment',
+                to='patchwork.Submission',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='delegationrule',
+            name='path',
+            field=models.CharField(
+                help_text='An fnmatch-style pattern to match filenames against.',  # noqa
+                max_length=255,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='delegationrule',
+            name='priority',
+            field=models.IntegerField(
+                default=0,
+                help_text='The priority of the rule. Rules with a higher priority will override rules with lower priorities',  # noqa
+            ),
+        ),
+        migrations.AlterField(
+            model_name='delegationrule',
+            name='user',
+            field=models.ForeignKey(
+                help_text='A user to delegate the patch to.',
+                on_delete=django.db.models.deletion.CASCADE,
+                to=settings.AUTH_USER_MODEL,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='emailconfirmation',
+            name='type',
+            field=models.CharField(
+                choices=[
+                    ('userperson', 'User-Person association'),
+                    ('registration', 'Registration'),
+                    ('optout', 'Email opt-out'),
+                ],
+                max_length=20,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='event',
+            name='category',
+            field=models.CharField(
+                choices=[
+                    ('cover-created', 'Cover Letter Created'),
+                    ('patch-created', 'Patch Created'),
+                    ('patch-completed', 'Patch Completed'),
+                    ('patch-state-changed', 'Patch State Changed'),
+                    ('patch-delegated', 'Patch Delegate Changed'),
+                    ('check-created', 'Check Created'),
+                    ('series-created', 'Series Created'),
+                    ('series-completed', 'Series Completed'),
+                ],
+                db_index=True,
+                help_text='The category of the event.',
+                max_length=20,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='event',
+            name='cover',
+            field=models.ForeignKey(
+                blank=True,
+                help_text='The cover letter that this event was created for.',
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='+',
+                to='patchwork.CoverLetter',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='event',
+            name='date',
+            field=models.DateTimeField(
+                default=datetime.datetime.utcnow,
+                help_text='The time this event was created.',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='event',
+            name='patch',
+            field=models.ForeignKey(
+                blank=True,
+                help_text='The patch that this event was created for.',
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='+',
+                to='patchwork.Patch',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='event',
+            name='project',
+            field=models.ForeignKey(
+                help_text='The project that the events belongs to.',
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='+',
+                to='patchwork.Project',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='event',
+            name='series',
+            field=models.ForeignKey(
+                blank=True,
+                help_text='The series that this event was created for.',
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='+',
+                to='patchwork.Series',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='patch',
+            name='number',
+            field=models.PositiveSmallIntegerField(
+                default=None,
+                help_text='The number assigned to this patch in the series',
+                null=True,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='project',
+            name='commit_url_format',
+            field=models.CharField(
+                blank=True,
+                help_text='URL format for a particular commit. {} will be replaced by the commit SHA.',  # noqa
+                max_length=2000,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='project',
+            name='list_archive_url_format',
+            field=models.CharField(
+                blank=True,
+                help_text="URL format for the list archive's Message-ID redirector. {} will be replaced by the Message-ID.",  # noqa
+                max_length=2000,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='project',
+            name='subject_match',
+            field=models.CharField(
+                blank=True,
+                default='',
+                help_text='Regex to match the subject against if only part of emails sent to the list belongs to this project. Will be used with IGNORECASE and MULTILINE flags. If rules for more projects match the first one returned from DB is chosen; empty field serves as a default for every email which has no other match.',  # noqa
+                max_length=64,
+                validators=[patchwork.models.validate_regex_compiles],
+            ),
+        ),
+        migrations.AlterField(
+            model_name='series',
+            name='name',
+            field=models.CharField(
+                blank=True,
+                help_text='An optional name to associate with the series, e.g. "John\'s PCI series".',  # noqa
+                max_length=255,
+                null=True,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='series',
+            name='total',
+            field=models.IntegerField(
+                help_text='Number of patches in series as indicated by the subject prefix(es)'  # noqa
+            ),
+        ),
+        migrations.AlterField(
+            model_name='series',
+            name='version',
+            field=models.IntegerField(
+                default=1,
+                help_text='Version of series as indicated by the subject prefix(es)',  # noqa
+            ),
+        ),
+        migrations.AlterField(
+            model_name='seriesreference',
+            name='series',
+            field=models.ForeignKey(
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='references',
+                related_query_name='reference',
+                to='patchwork.Series',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='abbrev',
+            field=models.CharField(
+                help_text='Short (one-or-two letter) abbreviation for the tag, used in table column headers',  # noqa
+                max_length=2,
+                unique=True,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='pattern',
+            field=models.CharField(
+                help_text='A simple regex to match the tag in the content of a message. Will be used with MULTILINE and IGNORECASE flags. eg. ^Acked-by:',  # noqa
+                max_length=50,
+                validators=[patchwork.models.validate_regex_compiles],
+            ),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='show_column',
+            field=models.BooleanField(
+                default=True,
+                help_text="Show a column displaying this tag's count in the patch list view",  # noqa
+            ),
+        ),
+        migrations.AlterField(
+            model_name='userprofile',
+            name='items_per_page',
+            field=models.PositiveIntegerField(
+                default=100, help_text='Number of items to display per page'
+            ),
+        ),
+        migrations.AlterField(
+            model_name='userprofile',
+            name='send_email',
+            field=models.BooleanField(
+                default=False,
+                help_text='Selecting this option allows patchwork to send email on your behalf',  # noqa
+            ),
+        ),
+        migrations.AlterField(
+            model_name='userprofile',
+            name='show_ids',
+            field=models.BooleanField(
+                default=False,
+                help_text='Show click-to-copy patch IDs in the list view',
+            ),
+        ),
+    ]
diff --git patchwork/migrations/0038_unique_series_references.py patchwork/migrations/0038_unique_series_references.py
new file mode 100644
index 00000000..c5517ada
--- /dev/null
+++ patchwork/migrations/0038_unique_series_references.py
@@ -0,0 +1,121 @@ 
+from django.db import connection, migrations, models
+from django.db.models import Count
+import django.db.models.deletion
+
+
+def merge_duplicate_series(apps, schema_editor):
+    SeriesReference = apps.get_model('patchwork', 'SeriesReference')
+    Patch = apps.get_model('patchwork', 'Patch')
+
+    msgid_seriesrefs = {}
+
+    # find all SeriesReference that share a msgid but point to different series
+    # and decide which of the series is going to be the authoritative one
+    msgid_counts = (
+        SeriesReference.objects.values('msgid')
+        .annotate(count=Count('msgid'))
+        .filter(count__gt=1)
+    )
+    for msgid_count in msgid_counts:
+        msgid = msgid_count['msgid']
+        chosen_ref = None
+        for series_ref in SeriesReference.objects.filter(msgid=msgid):
+            if series_ref.series.cover_letter:
+                if chosen_ref:
+                    # I don't think this can happen, but explode if it does
+                    raise Exception(
+                        "Looks like you've got two or more series that share "
+                        "some patches but do not share a cover letter. Unable "
+                        "to auto-resolve."
+                    )
+
+                # if a series has a cover letter, that's the one we'll group
+                # everything under
+                chosen_ref = series_ref
+
+        if not chosen_ref:
+            # if none of the series have cover letters, simply use the last
+            # one (hint: this relies on Python's weird scoping for for loops
+            # where 'series_ref' is still accessible outside the loop)
+            chosen_ref = series_ref
+
+        msgid_seriesrefs[msgid] = chosen_ref
+
+    # reassign any patches referring to non-authoritative series to point to
+    # the authoritative one, and delete the other series; we do this separately
+    # to allow us a chance to raise the exception above if necessary
+    for msgid, chosen_ref in msgid_seriesrefs.items():
+        for series_ref in SeriesReference.objects.filter(msgid=msgid):
+            if series_ref == chosen_ref:
+                continue
+
+            # update the patches to point to our chosen series instead, on the
+            # assumption that all other metadata is correct
+            for patch in Patch.objects.filter(series=series_ref.series):
+                patch.series = chosen_ref.series
+                patch.save()
+
+            # delete the other series (which will delete the series ref)
+            series_ref.series.delete()
+
+
+def copy_project_field(apps, schema_editor):
+    if connection.vendor == 'postgresql':
+        schema_editor.execute(
+            """
+            UPDATE patchwork_seriesreference
+              SET project_id = patchwork_series.project_id
+            FROM patchwork_series
+              WHERE patchwork_seriesreference.series_id = patchwork_series.id
+        """
+        )
+    elif connection.vendor == 'mysql':
+        schema_editor.execute(
+            """
+            UPDATE patchwork_seriesreference, patchwork_series
+              SET patchwork_seriesreference.project_id = patchwork_series.project_id
+            WHERE patchwork_seriesreference.series_id = patchwork_series.id
+        """
+        )  # noqa
+    else:
+        SeriesReference = apps.get_model('patchwork', 'SeriesReference')
+
+        for series_ref in SeriesReference.objects.all().select_related(
+            'series'
+        ):
+            series_ref.project = series_ref.series.project
+            series_ref.save()
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [('patchwork', '0037_python_3')]
+
+    operations = [
+        migrations.RunPython(
+            merge_duplicate_series, migrations.RunPython.noop, atomic=False
+        ),
+        migrations.AddField(
+            model_name='seriesreference',
+            name='project',
+            field=models.ForeignKey(
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                to='patchwork.Project',
+            ),
+        ),
+        migrations.AlterUniqueTogether(
+            name='seriesreference', unique_together={('project', 'msgid')}
+        ),
+        migrations.RunPython(
+            copy_project_field, migrations.RunPython.noop, atomic=False
+        ),
+        migrations.AlterField(
+            model_name='seriesreference',
+            name='project',
+            field=models.ForeignKey(
+                on_delete=django.db.models.deletion.CASCADE,
+                to='patchwork.Project',
+            ),
+        ),
+    ]
diff --git patchwork/models.py patchwork/models.py
index c198bc2c..e5bfecf8 100644
--- patchwork/models.py
+++ patchwork/models.py
@@ -79,7 +79,8 @@  class Project(models.Model):
     webscm_url = models.CharField(max_length=2000, blank=True)
     list_archive_url = models.CharField(max_length=2000, blank=True)
     list_archive_url_format = models.CharField(
-        max_length=2000, blank=True,
+        max_length=2000,
+        blank=True,
         help_text="URL format for the list archive's Message-ID redirector. "
         "{} will be replaced by the Message-ID.")
     commit_url_format = models.CharField(
@@ -783,6 +784,7 @@  class SeriesReference(models.Model):
     required to handle the case whereby one or more patches are
     received before the cover letter.
     """
+    project = models.ForeignKey(Project, on_delete=models.CASCADE)
     series = models.ForeignKey(Series, related_name='references',
                                related_query_name='reference',
                                on_delete=models.CASCADE)
@@ -792,7 +794,7 @@  class SeriesReference(models.Model):
         return self.msgid
 
     class Meta:
-        unique_together = [('series', 'msgid')]
+        unique_together = [('project', 'msgid')]
 
 
 class Bundle(models.Model):
diff --git patchwork/parser.py patchwork/parser.py
index 7dc66bc0..4b837ae0 100644
--- patchwork/parser.py
+++ patchwork/parser.py
@@ -16,6 +16,7 @@  import re
 
 from django.contrib.auth.models import User
 from django.db.utils import IntegrityError
+from django.db import transaction
 from django.utils import six
 
 from patchwork.models import Comment
@@ -251,16 +252,9 @@  def _find_series_by_references(project, mail):
     for ref in refs:
         try:
             return SeriesReference.objects.get(
-                msgid=ref[:255], series__project=project).series
+                msgid=ref[:255], project=project).series
         except SeriesReference.DoesNotExist:
             continue
-        except SeriesReference.MultipleObjectsReturned:
-            # FIXME: Open bug: this can happen when we're processing
-            # messages in parallel. Pick the first and log.
-            logger.error("Multiple SeriesReferences for %s in project %s!" %
-                         (ref[:255], project.name))
-            return SeriesReference.objects.filter(
-                msgid=ref[:255], series__project=project).first().series
 
 
 def _find_series_by_markers(project, mail, author):
@@ -1029,47 +1023,64 @@  def parse_mail(mail, list_id=None):
         except IntegrityError:
             raise DuplicateMailError(msgid=msgid)
 
-        # if we don't have a series marker, we will never have an existing
-        # series to match against.
-        series = None
-        if n:
-            series = find_series(project, mail, author)
+        for attempt in range(10):  # arbitrary retry count
+            try:
+                with transaction.atomic():
+                    # if we don't have a series marker, we will never have an
+                    # existing series to match against.
+                    series = None
+                    if n:
+                        series = find_series(project, mail, author)
+                    else:
+                        x = n = 1
+
+                    # We will create a new series if:
+                    # - there is no existing series to assign this patch to, or
+                    # - there is an existing series, but it already has a patch
+                    #   with this number in it
+                    if not series or Patch.objects.filter(
+                            series=series, number=x).count():
+                        series = Series.objects.create(
+                            project=project,
+                            date=date,
+                            submitter=author,
+                            version=version,
+                            total=n)
+
+                        # NOTE(stephenfin) We must save references for series.
+                        # We do this to handle the case where a later patch is
+                        # received first. Without storing references, it would
+                        # not be possible to identify the relationship between
+                        # patches as the earlier patch does not reference the
+                        # later one.
+                        for ref in refs + [msgid]:
+                            ref = ref[:255]
+                            # we don't want duplicates
+                            try:
+                                # we could have a ref to a previous series.
+                                # (For example, a series sent in reply to
+                                # another series.) That should not create a
+                                # series ref for this series, so check for the
+                                # msg-id only, not the msg-id/series pair.
+                                SeriesReference.objects.get(
+                                    msgid=ref, project=project)
+                            except SeriesReference.DoesNotExist:
+                                SeriesReference.objects.create(
+                                    msgid=ref, project=project, series=series)
+                    break
+            except IntegrityError:
+                # we lost the race so go again
+                logger.info('Conflict while saving series. This is '
+                            'probably because multiple patches belonging '
+                            'to the same series have been received at '
+                            'once. Trying again (attempt %02d/10)',
+                            attempt)
         else:
-            x = n = 1
-
-        # We will create a new series if:
-        # - there is no existing series to assign this patch to, or
-        # - there is an existing series, but it already has a patch with this
-        #   number in it
-        if not series or Patch.objects.filter(series=series, number=x).count():
-            series = Series.objects.create(
-                project=project,
-                date=date,
-                submitter=author,
-                version=version,
-                total=n)
-
-            # NOTE(stephenfin) We must save references for series. We
-            # do this to handle the case where a later patch is
-            # received first. Without storing references, it would not
-            # be possible to identify the relationship between patches
-            # as the earlier patch does not reference the later one.
-            for ref in refs + [msgid]:
-                ref = ref[:255]
-                # we don't want duplicates
-                try:
-                    # we could have a ref to a previous series. (For
-                    # example, a series sent in reply to another
-                    # series.) That should not create a series ref
-                    # for this series, so check for the msg-id only,
-                    # not the msg-id/series pair.
-                    SeriesReference.objects.get(msgid=ref,
-                                                series__project=project)
-                except SeriesReference.DoesNotExist:
-                    SeriesReference.objects.create(series=series, msgid=ref)
-                except SeriesReference.MultipleObjectsReturned:
-                    logger.error("Multiple SeriesReferences for %s"
-                                 " in project %s!" % (ref, project.name))
+            # we failed to save the series so return the series-less patch
+            logger.warning('Failed to save series. Your patch has been '
+                           'saved but this should not happen. Please '
+                           'report this as a bug and include logs')
+            return patch
 
         # add to a series if we have found one, and we have a numbered
         # patch. Don't add unnumbered patches (for example diffs sent
@@ -1107,14 +1118,9 @@  def parse_mail(mail, list_id=None):
             # message
             try:
                 series = SeriesReference.objects.get(
-                    msgid=msgid, series__project=project).series
+                    msgid=msgid, project=project).series
             except SeriesReference.DoesNotExist:
                 series = None
-            except SeriesReference.MultipleObjectsReturned:
-                logger.error("Multiple SeriesReferences for %s"
-                             " in project %s!" % (msgid, project.name))
-                series = SeriesReference.objects.filter(
-                    msgid=msgid, series__project=project).first().series
 
             if not series:
                 series = Series.objects.create(
@@ -1127,12 +1133,8 @@  def parse_mail(mail, list_id=None):
                 # we don't save the in-reply-to or references fields
                 # for a cover letter, as they can't refer to the same
                 # series
-                try:
-                    SeriesReference.objects.get_or_create(series=series,
-                                                          msgid=msgid)
-                except SeriesReference.MultipleObjectsReturned:
-                    logger.error("Multiple SeriesReferences for %s"
-                                 " in project %s!" % (msgid, project.name))
+                SeriesReference.objects.create(
+                    msgid=msgid, project=project, series=series)
 
             try:
                 cover_letter = CoverLetter.objects.create(
diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
index e5a2fca3..a69c64ba 100644
--- patchwork/tests/test_parser.py
+++ patchwork/tests/test_parser.py
@@ -361,17 +361,20 @@  class SeriesCorrelationTest(TestCase):
         msgids = [make_msgid()]
         project = create_project()
         series_v1 = create_series(project=project)
-        create_series_reference(msgid=msgids[0], series=series_v1)
+        create_series_reference(msgid=msgids[0], series=series_v1,
+                                project=project)
 
         # ...and three patches
         for i in range(3):
             msgids.append(make_msgid())
-            create_series_reference(msgid=msgids[-1], series=series_v1)
+            create_series_reference(msgid=msgids[-1], series=series_v1,
+                                    project=project)
 
         # now create a new series with "cover letter"
         msgids.append(make_msgid())
         series_v2 = create_series(project=project)
-        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
+        ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
+                                         project=project)
 
         # ...and the "first patch" of this new series
         msgid = make_msgid()
diff --git patchwork/tests/utils.py patchwork/tests/utils.py
index 577183d0..4ead1781 100644
--- patchwork/tests/utils.py
+++ patchwork/tests/utils.py
@@ -282,8 +282,12 @@  def create_series(**kwargs):
 
 def create_series_reference(**kwargs):
     """Create 'SeriesReference' object."""
+    project = kwargs.pop('project', create_project())
+    series = kwargs.pop('series', create_series(project=project))
+
     values = {
-        'series': create_series() if 'series' not in kwargs else None,
+        'series': series,
+        'project': project,
         'msgid': make_msgid(),
     }
     values.update(**kwargs)