diff mbox

[05/10] parsemail: Handle series sent "in-reply-to"

Message ID 1465814502-14108-6-git-send-email-stephen.finucane@intel.com
State Superseded
Headers show

Commit Message

Stephen Finucane June 13, 2016, 10:41 a.m. UTC
Take a series, "v2", sent as a reply to another, "v1", like so:

    [PATCH 0/3] A cover letter
      [PATCH 1/3] The first patch
      ...
      [PATCH v2 0/3] A cover letter
        [PATCH v2 1/3] The first patch
        ...

The current behavior for traversing references is oldest first. For
example, for "PATCH v2 1/3" above, the references field will look like
so:

    ["0/3", "v2 0/3", "v2 1/3"]

Where "0/3" corresponds to the message-id of "PATCH 0/3".

By traversing this way, patches sent in-reply-to an existing series
will always be linked to that series, instead of creating a new series,
as expected. Flip things around, and instead attempt to find series
using the most recent message-id first. This will ensure the most
recent series is always used.

Signed-off-by: Stephen Finucane <stephen.finucane@intel.com>
---
 patchwork/bin/parsemail.py |   10 +++++++++-
 1 files changed, 9 insertions(+), 1 deletions(-)

Comments

Andy Doan June 23, 2016, 6:24 p.m. UTC | #1
On 06/13/2016 05:41 AM, Stephen Finucane wrote:
> Take a series, "v2", sent as a reply to another, "v1", like so:
>
>      [PATCH 0/3] A cover letter
>        [PATCH 1/3] The first patch
>        ...
>        [PATCH v2 0/3] A cover letter
>          [PATCH v2 1/3] The first patch
>          ...
>
> The current behavior for traversing references is oldest first. For
> example, for "PATCH v2 1/3" above, the references field will look like
> so:
>
>      ["0/3", "v2 0/3", "v2 1/3"]
>
> Where "0/3" corresponds to the message-id of "PATCH 0/3".
>
> By traversing this way, patches sent in-reply-to an existing series
> will always be linked to that series, instead of creating a new series,
> as expected. Flip things around, and instead attempt to find series
> using the most recent message-id first. This will ensure the most
> recent series is always used.
>
> Signed-off-by: Stephen Finucane <stephen.finucane@intel.com>
> ---
>   patchwork/bin/parsemail.py |   10 +++++++++-
>   1 files changed, 9 insertions(+), 1 deletions(-)
>
> diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
> index f52776e..8baacf6 100755
> --- a/patchwork/bin/parsemail.py
> +++ b/patchwork/bin/parsemail.py
> @@ -137,7 +137,7 @@ def find_series(mail):
>       """
>       series = None
>
> -    for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
> +    for ref in [mail.get('Message-ID').strip()] + find_references(mail)[::-1]:
>           # try parsing by RFC5322 fields first
>           try:
>               series_ref = SeriesReference.objects.get(msgid=ref)
> @@ -575,6 +575,10 @@ def parse_mail(mail, list_id=None):
>               series.save()
>
>               for ref in refs + [msgid]:  # save references for series

I think this can be simplified and perhaps be less racy:
> +                # prevent duplication
> +                if SeriesReference.objects.filter(msgid=ref).exists():
> +                    continue
> +
>                   series_ref = SeriesReference(series=series,
>                                                msgid=ref)
>                   series_ref.save()

    try:
        SeriesReference.objects.create(series=series, msgid=ref)
    except IntegrityError:
         pass # already exists

> @@ -621,6 +625,10 @@ def parse_mail(mail, list_id=None):
>                   series.save()
>
>                   for ref in refs + [msgid]:  # save references for series
> +                    # prevent duplication
> +                    if SeriesReference.objects.filter(msgid=ref).exists():
> +                        continue
> +
>                       series_ref = SeriesReference(series=series,
>                                                    msgid=ref)
>                       series_ref.save()
>
Stephen Finucane June 23, 2016, 10 p.m. UTC | #2
On 23 Jun 13:24, Andy Doan wrote:
> On 06/13/2016 05:41 AM, Stephen Finucane wrote:
> >Take a series, "v2", sent as a reply to another, "v1", like so:
> >
> >     [PATCH 0/3] A cover letter
> >       [PATCH 1/3] The first patch
> >       ...
> >       [PATCH v2 0/3] A cover letter
> >         [PATCH v2 1/3] The first patch
> >         ...
> >
> >The current behavior for traversing references is oldest first. For
> >example, for "PATCH v2 1/3" above, the references field will look like
> >so:
> >
> >     ["0/3", "v2 0/3", "v2 1/3"]
> >
> >Where "0/3" corresponds to the message-id of "PATCH 0/3".
> >
> >By traversing this way, patches sent in-reply-to an existing series
> >will always be linked to that series, instead of creating a new series,
> >as expected. Flip things around, and instead attempt to find series
> >using the most recent message-id first. This will ensure the most
> >recent series is always used.
> >
> >Signed-off-by: Stephen Finucane <stephen.finucane@intel.com>
> >---
> >  patchwork/bin/parsemail.py |   10 +++++++++-
> >  1 files changed, 9 insertions(+), 1 deletions(-)
> >
> >diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
> >index f52776e..8baacf6 100755
> >--- a/patchwork/bin/parsemail.py
> >+++ b/patchwork/bin/parsemail.py
> >@@ -137,7 +137,7 @@ def find_series(mail):
> >      """
> >      series = None
> >
> >-    for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
> >+    for ref in [mail.get('Message-ID').strip()] + find_references(mail)[::-1]:
> >          # try parsing by RFC5322 fields first
> >          try:
> >              series_ref = SeriesReference.objects.get(msgid=ref)
> >@@ -575,6 +575,10 @@ def parse_mail(mail, list_id=None):
> >              series.save()
> >
> >              for ref in refs + [msgid]:  # save references for series
> 
> I think this can be simplified and perhaps be less racy:
> >+                # prevent duplication
> >+                if SeriesReference.objects.filter(msgid=ref).exists():
> >+                    continue
> >+
> >                  series_ref = SeriesReference(series=series,
> >                                               msgid=ref)
> >                  series_ref.save()
> 
>    try:
>        SeriesReference.objects.create(series=series, msgid=ref)
>    except IntegrityError:
>         pass # already exists

Yup, makes sense. I'm also sure there's a get_or_create method
somewhere. Let me try one of those.

> >@@ -621,6 +625,10 @@ def parse_mail(mail, list_id=None):
> >                  series.save()
> >
> >                  for ref in refs + [msgid]:  # save references for series
> >+                    # prevent duplication
> >+                    if SeriesReference.objects.filter(msgid=ref).exists():
> >+                        continue
> >+
> >                      series_ref = SeriesReference(series=series,
> >                                                   msgid=ref)
> >                      series_ref.save()
> >
>
diff mbox

Patch

diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index f52776e..8baacf6 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -137,7 +137,7 @@  def find_series(mail):
     """
     series = None
 
-    for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
+    for ref in [mail.get('Message-ID').strip()] + find_references(mail)[::-1]:
         # try parsing by RFC5322 fields first
         try:
             series_ref = SeriesReference.objects.get(msgid=ref)
@@ -575,6 +575,10 @@  def parse_mail(mail, list_id=None):
             series.save()
 
             for ref in refs + [msgid]:  # save references for series
+                # prevent duplication
+                if SeriesReference.objects.filter(msgid=ref).exists():
+                    continue
+
                 series_ref = SeriesReference(series=series,
                                              msgid=ref)
                 series_ref.save()
@@ -621,6 +625,10 @@  def parse_mail(mail, list_id=None):
                 series.save()
 
                 for ref in refs + [msgid]:  # save references for series
+                    # prevent duplication
+                    if SeriesReference.objects.filter(msgid=ref).exists():
+                        continue
+
                     series_ref = SeriesReference(series=series,
                                                  msgid=ref)
                     series_ref.save()