diff mbox

[08/15] series: Create Series objects when parsing mails

Message ID 1444387202-25735-9-git-send-email-damien.lespiau@intel.com
State Superseded
Headers show

Commit Message

Damien Lespiau Oct. 9, 2015, 10:39 a.m. UTC
This commit only adds basic support for the initial series submission.

Series with or without cover letters are supported. When sent without a
cover letter, the series is named "Untitled series". It's planned to let
the user chose a more appropriate series title through the web UI at a
later point.

Single patches are treated as a Series of 1 patch, named with the
subject of that patch.

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
---
 patchwork/bin/parsemail.py | 86 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 81 insertions(+), 5 deletions(-)

Comments

Stephen Finucane Oct. 9, 2015, 11:21 p.m. UTC | #1
> This commit only adds basic support for the initial series submission.

> 

> Series with or without cover letters are supported. When sent without a

> cover letter, the series is named "Untitled series". It's planned to let

> the user chose a more appropriate series title through the web UI at a

> later point.

> 

> Single patches are treated as a Series of 1 patch, named with the

> subject of that patch.

> 

> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>


Some comments below.

> ---

>  patchwork/bin/parsemail.py | 86

> +++++++++++++++++++++++++++++++++++++++++++---

>  1 file changed, 81 insertions(+), 5 deletions(-)

> 

> diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py

> index 8bbb7ee..1e7e083 100755

> --- a/patchwork/bin/parsemail.py

> +++ b/patchwork/bin/parsemail.py

> @@ -31,8 +31,8 @@ from email.utils import parsedate_tz, mktime_tz

>  import logging

> 

>  from patchwork.parser import parse_patch

> -from patchwork.models import Patch, Project, Person, Comment, State, \

> -        get_default_initial_patch_state

> +from patchwork.models import Patch, Project, Person, Comment, State,

> Series, \

> +        SeriesRevision, get_default_initial_patch_state


Same comments as a previous mail - you could use brackets here.

>  import django

>  from django.conf import settings

>  from django.contrib.auth.models import User

> @@ -159,6 +159,9 @@ class MailContent:

>      def __init__(self):

>          self.patch = None

>          self.comment = None

> +        self.series = None

> +        self.revision = None

> +        self.patch_order = 1    # place of the patch in the series

> 

>  def build_references_list(mail):

>      # construct a list of possible reply message ids

> @@ -245,9 +248,36 @@ def find_content(project, mail):

> 

>      ret = MailContent()

> 

> +    (name, prefixes) = clean_subject(mail.get('Subject'),

> [project.linkname])

> +    (x, n) = parse_series_marker(prefixes)

> +    refs = build_references_list(mail)


This code looks familiar - is it a copy of code in another function? If so, let's keep things DRY with another function (I'll look back over patches once I finish the rest of the series and reply with findings).

> +    is_root = refs == []

> +    is_cover_letter = is_root and x == 0

> +

> +    if is_cover_letter or patchbuf:

> +        msgid = mail.get('Message-Id').strip()

> +

> +        # Series get a generic name when they don't start by a cover

> letter or

> +        # when they haven't received the root message yet. Except when

> it's

> +        # only 1 patch, then the series takes the patch subject as name.

> +        series_name = None

> +        if is_cover_letter or n is None:

> +            series_name = strip_prefixes(name)

> +

> +        (ret.series, ret.revision) = find_series_for_mail(project,

> series_name,

> +                                                          msgid, refs)

> +        ret.series.n_patches = n or 1

> +

> +        date = mail_date(mail)

> +        if not ret.series.submitted or date < ret.series.submitted:

> +            ret.series.submitted = date

> +

> +    if is_cover_letter:

> +        ret.revision.cover_letter = clean_content(commentbuf)

> +        return ret

> +

>      if pullurl or patchbuf:

> -        (name, prefixes) = clean_subject(mail.get('Subject'),

> -                                         [project.linkname])

> +        ret.patch_order = x or 1

>          ret.patch = Patch(name = name, pull_url = pullurl, content =

> patchbuf,

>                      date = mail_date(mail), headers = mail_headers(mail))

> 

> @@ -260,7 +290,6 @@ def find_content(project, mail):

>                      headers = mail_headers(mail))

> 

>          else:

> -            refs = build_references_list(mail)

>              cpatch = find_patch_for_comment(project, refs)

>              if not cpatch:

>                  return ret

> @@ -268,8 +297,35 @@ def find_content(project, mail):

>                      content = clean_content(commentbuf),

>                      headers = mail_headers(mail))

> 

> +    # make sure we always have a valid (series,revision) tuple if we have

> a

> +    # patch. We don't consider pull requests a series.

> +    if ret.patch and not pullurl and (not ret.series or not ret.revision):

> +        raise Exception("Could not find series for: %s" % name)

> +

>      return ret

> 

> +# The complexity here is because patches can be received out of order:

> +# If we receive a patch, part of series, before the root message, we

> create a

> +# placeholder series that will be updated once we receive the root

> message.

> +def find_series_for_mail(project, name, msgid, refs):

> +    if refs == []:

> +        root_msgid = msgid

> +    else:

> +        root_msgid = refs[-1]

> +

> +    try:

> +        revision = SeriesRevision.objects.get(root_msgid = root_msgid)

> +        series = revision.series

> +        if name:

> +            series.name = name

> +    except SeriesRevision.DoesNotExist:

> +        if not name:

> +            name = "Untitled series"


Nope. This is a lie as the name of the series is not 'Untitled series': it's None (i.e. nothing). The model needs to be updated to allow 'null' names and this 'Untitled series' string should be generated in the UI.

> +        series = Series(name=name)

> +        revision = SeriesRevision(root_msgid = root_msgid)

> +

> +    return (series, revision)

> +

>  def find_patch_for_comment(project, refs):

>      for ref in refs:

>          patch = None

> @@ -344,6 +400,10 @@ def clean_subject(subject, drop_prefixes = None):

> 

>      return (subject, prefixes)

> 

> +prefixes_re = re.compile('^\[[^\]]*\]\s*')

> +def strip_prefixes(subject):

> +    return prefixes_re.sub('', subject)

> +

>  sig_re = re.compile('^(-- |_+)\n.*', re.S | re.M)

>  def clean_content(str):

>      """ Try to remove signature (-- ) and list footer (_____) cruft """

> @@ -398,6 +458,20 @@ def parse_mail(mail):

>          return 0

>      patch = content.patch

>      comment = content.comment

> +    series = content.series

> +    revision = content.revision

> +

> +    if series:

> +        if save_required:

> +            author.save()

> +            save_required = False

> +        series.project = project

> +        series.submitter = author

> +        series.save()

> +

> +    if revision:

> +        revision.series = series

> +        revision.save()

> 

>      if patch:

>          # we delay the saving until we know we have a patch.

> @@ -411,6 +485,8 @@ def parse_mail(mail):

>          patch.delegate = get_delegate(

>                  mail.get('X-Patchwork-Delegate', '').strip())

>          patch.save()

> +        if revision:

> +            revision.add_patch(patch, content.patch_order)

> 

>      if comment:

>          if save_required:

> --

> 2.1.0

> 

> _______________________________________________

> Patchwork mailing list

> Patchwork@lists.ozlabs.org

> https://lists.ozlabs.org/listinfo/patchwork
Damien Lespiau Oct. 19, 2015, 11:23 a.m. UTC | #2
On Sat, Oct 10, 2015 at 12:21:17AM +0100, Finucane, Stephen wrote:
> > +# The complexity here is because patches can be received out of order:
> > +# If we receive a patch, part of series, before the root message, we
> > create a
> > +# placeholder series that will be updated once we receive the root
> > message.
> > +def find_series_for_mail(project, name, msgid, refs):
> > +    if refs == []:
> > +        root_msgid = msgid
> > +    else:
> > +        root_msgid = refs[-1]
> > +
> > +    try:
> > +        revision = SeriesRevision.objects.get(root_msgid = root_msgid)
> > +        series = revision.series
> > +        if name:
> > +            series.name = name
> > +    except SeriesRevision.DoesNotExist:
> > +        if not name:
> > +            name = "Untitled series"
> 
> Nope. This is a lie as the name of the series is not 'Untitled
> series': it's None (i.e. nothing). The model needs to be updated to
> allow 'null' names and this 'Untitled series' string should be
> generated in the UI.

I went back and forth on this. Making the string part of the client code
instead of having a stronger invariant "all series have a name" make
things harder: special handling in all clients (web site, but also
command line clients), special handling when editing (I have further
work allowing people to edit their series name through the web UI), ...

The real thing I could find to leave the string outside of the DB is for
i18n, but I have no intention to go there.
Stephen Finucane Oct. 20, 2015, 12:33 a.m. UTC | #3
> On Sat, Oct 10, 2015 at 12:21:17AM +0100, Finucane, Stephen wrote:
> > > +# The complexity here is because patches can be received out of order:
> > > +# If we receive a patch, part of series, before the root message, we
> > > create a
> > > +# placeholder series that will be updated once we receive the root
> > > message.
> > > +def find_series_for_mail(project, name, msgid, refs):
> > > +    if refs == []:
> > > +        root_msgid = msgid
> > > +    else:
> > > +        root_msgid = refs[-1]
> > > +
> > > +    try:
> > > +        revision = SeriesRevision.objects.get(root_msgid = root_msgid)
> > > +        series = revision.series
> > > +        if name:
> > > +            series.name = name
> > > +    except SeriesRevision.DoesNotExist:
> > > +        if not name:
> > > +            name = "Untitled series"
> >
> > Nope. This is a lie as the name of the series is not 'Untitled
> > series': it's None (i.e. nothing). The model needs to be updated to
> > allow 'null' names and this 'Untitled series' string should be
> > generated in the UI.
> 
> I went back and forth on this. Making the string part of the client code
> instead of having a stronger invariant "all series have a name" make
> things harder: special handling in all clients (web site, but also
> command line clients), special handling when editing (I have further
> work allowing people to edit their series name through the web UI), ...
> 
> The real thing I could find to leave the string outside of the DB is for
> i18n, but I have no intention to go there.

Hmmm, I understand: it's a tough one to decide on. Just to confirm, we don't use names to uniquely identify series, correct? If so, it would probably be valid to return an return an empty string to the user when using the API? There would be a little modification needed to the web UI but this should be only take a few lines. Referring back to the biggest collection of Python code I know, both OpenStack Nova/Neutron allow for empty instance/network names respectively given that they already have a UUID to identify these elements.

However, all this matters little without something like i18n as you point out. It would be nice-to-have from a DB perspective and a potential future i18n feature but it's not critical. If you'd like to rework this then please do. Otherwise:

Acked-by: Stephen Finucane <stephen.finucane@intel.com>
diff mbox

Patch

diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index 8bbb7ee..1e7e083 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -31,8 +31,8 @@  from email.utils import parsedate_tz, mktime_tz
 import logging
 
 from patchwork.parser import parse_patch
-from patchwork.models import Patch, Project, Person, Comment, State, \
-        get_default_initial_patch_state
+from patchwork.models import Patch, Project, Person, Comment, State, Series, \
+        SeriesRevision, get_default_initial_patch_state
 import django
 from django.conf import settings
 from django.contrib.auth.models import User
@@ -159,6 +159,9 @@  class MailContent:
     def __init__(self):
         self.patch = None
         self.comment = None
+        self.series = None
+        self.revision = None
+        self.patch_order = 1    # place of the patch in the series
 
 def build_references_list(mail):
     # construct a list of possible reply message ids
@@ -245,9 +248,36 @@  def find_content(project, mail):
 
     ret = MailContent()
 
+    (name, prefixes) = clean_subject(mail.get('Subject'), [project.linkname])
+    (x, n) = parse_series_marker(prefixes)
+    refs = build_references_list(mail)
+    is_root = refs == []
+    is_cover_letter = is_root and x == 0
+
+    if is_cover_letter or patchbuf:
+        msgid = mail.get('Message-Id').strip()
+
+        # Series get a generic name when they don't start by a cover letter or
+        # when they haven't received the root message yet. Except when it's
+        # only 1 patch, then the series takes the patch subject as name.
+        series_name = None
+        if is_cover_letter or n is None:
+            series_name = strip_prefixes(name)
+
+        (ret.series, ret.revision) = find_series_for_mail(project, series_name,
+                                                          msgid, refs)
+        ret.series.n_patches = n or 1
+
+        date = mail_date(mail)
+        if not ret.series.submitted or date < ret.series.submitted:
+            ret.series.submitted = date
+
+    if is_cover_letter:
+        ret.revision.cover_letter = clean_content(commentbuf)
+        return ret
+
     if pullurl or patchbuf:
-        (name, prefixes) = clean_subject(mail.get('Subject'),
-                                         [project.linkname])
+        ret.patch_order = x or 1
         ret.patch = Patch(name = name, pull_url = pullurl, content = patchbuf,
                     date = mail_date(mail), headers = mail_headers(mail))
 
@@ -260,7 +290,6 @@  def find_content(project, mail):
                     headers = mail_headers(mail))
 
         else:
-            refs = build_references_list(mail)
             cpatch = find_patch_for_comment(project, refs)
             if not cpatch:
                 return ret
@@ -268,8 +297,35 @@  def find_content(project, mail):
                     content = clean_content(commentbuf),
                     headers = mail_headers(mail))
 
+    # make sure we always have a valid (series,revision) tuple if we have a
+    # patch. We don't consider pull requests a series.
+    if ret.patch and not pullurl and (not ret.series or not ret.revision):
+        raise Exception("Could not find series for: %s" % name)
+
     return ret
 
+# The complexity here is because patches can be received out of order:
+# If we receive a patch, part of series, before the root message, we create a
+# placeholder series that will be updated once we receive the root message.
+def find_series_for_mail(project, name, msgid, refs):
+    if refs == []:
+        root_msgid = msgid
+    else:
+        root_msgid = refs[-1]
+
+    try:
+        revision = SeriesRevision.objects.get(root_msgid = root_msgid)
+        series = revision.series
+        if name:
+            series.name = name
+    except SeriesRevision.DoesNotExist:
+        if not name:
+            name = "Untitled series"
+        series = Series(name=name)
+        revision = SeriesRevision(root_msgid = root_msgid)
+
+    return (series, revision)
+
 def find_patch_for_comment(project, refs):
     for ref in refs:
         patch = None
@@ -344,6 +400,10 @@  def clean_subject(subject, drop_prefixes = None):
 
     return (subject, prefixes)
 
+prefixes_re = re.compile('^\[[^\]]*\]\s*')
+def strip_prefixes(subject):
+    return prefixes_re.sub('', subject)
+
 sig_re = re.compile('^(-- |_+)\n.*', re.S | re.M)
 def clean_content(str):
     """ Try to remove signature (-- ) and list footer (_____) cruft """
@@ -398,6 +458,20 @@  def parse_mail(mail):
         return 0
     patch = content.patch
     comment = content.comment
+    series = content.series
+    revision = content.revision
+
+    if series:
+        if save_required:
+            author.save()
+            save_required = False
+        series.project = project
+        series.submitter = author
+        series.save()
+
+    if revision:
+        revision.series = series
+        revision.save()
 
     if patch:
         # we delay the saving until we know we have a patch.
@@ -411,6 +485,8 @@  def parse_mail(mail):
         patch.delegate = get_delegate(
                 mail.get('X-Patchwork-Delegate', '').strip())
         patch.save()
+        if revision:
+            revision.add_patch(patch, content.patch_order)
 
     if comment:
         if save_required: