diff mbox series

[RFC,v2,1/1] Rework tagging infrastructure

Message ID 20180419163656.25378-2-vkabatov@redhat.com
State Changes Requested
Headers show
Series Rework tagging infrastructure | expand

Commit Message

Veronika Kabatova April 19, 2018, 4:36 p.m. UTC
From: Veronika Kabatova <vkabatov@redhat.com>

Solve #113 and #57 GitHub issues, fix up returning tags in the API,
keep track of tag origin to later be able to add tags to comments in
the API.

Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of
tags for each patch in series.

Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
---
 docs/usage/overview.rst                     |   9 +-
 patchwork/api/cover.py                      |  24 +++-
 patchwork/api/patch.py                      |  35 +++++-
 patchwork/management/commands/retag.py      |  15 ++-
 patchwork/migrations/0026_rework_tagging.py | 123 +++++++++++++++++++
 patchwork/models.py                         | 176 +++++++++++-----------------
 patchwork/templatetags/patch.py             |   3 +-
 patchwork/views/__init__.py                 |   3 -
 patchwork/views/utils.py                    |  16 ++-
 9 files changed, 277 insertions(+), 127 deletions(-)
 create mode 100644 patchwork/migrations/0026_rework_tagging.py

Comments

Stephen Finucane April 25, 2018, 10:13 a.m. UTC | #1
On Thu, 2018-04-19 at 18:36 +0200, vkabatov@redhat.com wrote:
> From: Veronika Kabatova <vkabatov@redhat.com>
> 
> Solve #113 and #57 GitHub issues, fix up returning tags in the API,
> keep track of tag origin to later be able to add tags to comments in
> the API.
> 
> Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of
> tags for each patch in series.
> 
> Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>

Comments below. Most of them are little improvements I'd like to see,
but there is one big issue around the difference between SeriesPatch
and Patch that we need to think about.

Stephen

> ---
>  docs/usage/overview.rst                     |   9 +-
>  patchwork/api/cover.py                      |  24 +++-
>  patchwork/api/patch.py                      |  35 +++++-
>  patchwork/management/commands/retag.py      |  15 ++-
>  patchwork/migrations/0026_rework_tagging.py | 123 +++++++++++++++++++
>  patchwork/models.py                         | 176 +++++++++++-----------------
>  patchwork/templatetags/patch.py             |   3 +-
>  patchwork/views/__init__.py                 |   3 -
>  patchwork/views/utils.py                    |  16 ++-
>  9 files changed, 277 insertions(+), 127 deletions(-)
>  create mode 100644 patchwork/migrations/0026_rework_tagging.py
> 
> diff --git a/docs/usage/overview.rst b/docs/usage/overview.rst
> index cc193f3..d7db840 100644
> --- a/docs/usage/overview.rst
> +++ b/docs/usage/overview.rst
> @@ -110,10 +110,11 @@ one delegate can be assigned to a patch.
>  Tags
>  ~~~~
>  
> -Tags are specially formatted metadata appended to the foot the body of a patch
> -or a comment on a patch. Patchwork extracts these tags at parse time and
> -associates them with the patch. You add extra tags to an email by replying to
> -the email. The following tags are available on a standard Patchwork install:
> +Tags are specially formatted metadata appended to the foot the body of a patch,
> +cover letter or a comment related to them. Patchwork extracts these tags at
> +parse time and associates them with patches. You add extra tags to an email by
> +replying to the email. The following tags are available on a standard Patchwork
> +install:
>  
>  Acked-by:
>  
> diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py
> index fc7ae97..4d82277 100644
> --- a/patchwork/api/cover.py
> +++ b/patchwork/api/cover.py
> @@ -29,6 +29,7 @@ from patchwork.api.embedded import PersonSerializer
>  from patchwork.api.embedded import ProjectSerializer
>  from patchwork.api.embedded import SeriesSerializer
>  from patchwork.models import CoverLetter
> +from patchwork.models import SubmissionTag
>  
>  
>  class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
> @@ -37,18 +38,36 @@ class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
>      submitter = PersonSerializer(read_only=True)
>      mbox = SerializerMethodField()
>      series = SeriesSerializer(many=True, read_only=True)
> +    tags = SerializerMethodField()
>  
>      def get_mbox(self, instance):
>          request = self.context.get('request')
>          return request.build_absolute_uri(instance.get_mbox_url())
>  
> +    def get_tags(self, instance):
> +        tags = instance.project.tags
> +        if not tags:
> +            return {}
> +
> +        all_tags = {}
> +        related_tags = SubmissionTag.objects.filter(
> +            submission=instance).values_list('tag__name', 'value').distinct()
> +        for tag in tags:
> +            all_tags[tag.name] = [related_tag[1] for related_tag in
> +                                  related_tags if related_tag[0] == tag.name]
> +            # Don't show tags that are not present
> +            if not all_tags[tag.name]:
> +                del(all_tags[tag.name])
> +
> +        return all_tags

This whole thing can be simplified to avoid accessing tags through
'instance.project.tags' property (which will be yet another DB access).
How about:

  def get_tags(self, instance):
      if not instance.project.use_tags:
          return {}

      tags = SubmissionTag.objects.filter(
          submission=instance).values_list('tag__name', 'value').distinct()
      
      result = {}
      for name, value in tags:
          result..setdefault(name, []).append(value)

      return result

This could probably be a property on the 'Tags' object itself too.

> +
>      class Meta:
>          model = CoverLetter
>          fields = ('id', 'url', 'project', 'msgid', 'date', 'name', 'submitter',
> -                  'mbox', 'series')
> +                  'mbox', 'series', 'tags')
>          read_only_fields = fields
>          versioned_fields = {
> -            '1.1': ('mbox', ),
> +            '1.1': ('mbox', 'tags'),
>          }
>          extra_kwargs = {
>              'url': {'view_name': 'api-cover-detail'},
> @@ -68,6 +87,7 @@ class CoverLetterDetailSerializer(CoverLetterListSerializer):
>          fields = CoverLetterListSerializer.Meta.fields + ('headers', 'content')
>          read_only_fields = fields
>          extra_kwargs = CoverLetterListSerializer.Meta.extra_kwargs
> +        versioned_fields = CoverLetterListSerializer.Meta.versioned_fields
>  
>  
>  class CoverLetterList(ListAPIView):
> diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
> index 115feff..295b046 100644
> --- a/patchwork/api/patch.py
> +++ b/patchwork/api/patch.py
> @@ -24,9 +24,9 @@ from rest_framework.generics import ListAPIView
>  from rest_framework.generics import RetrieveUpdateAPIView
>  from rest_framework.relations import RelatedField
>  from rest_framework.reverse import reverse
> -from rest_framework.serializers import HyperlinkedModelSerializer
>  from rest_framework.serializers import SerializerMethodField
>  
> +from patchwork.api.base import BaseHyperlinkedModelSerializer
>  from patchwork.api.base import PatchworkPermission
>  from patchwork.api.filters import PatchFilter
>  from patchwork.api.embedded import PersonSerializer
> @@ -34,7 +34,9 @@ from patchwork.api.embedded import ProjectSerializer
>  from patchwork.api.embedded import SeriesSerializer
>  from patchwork.api.embedded import UserSerializer
>  from patchwork.models import Patch
> +from patchwork.models import SeriesPatch
>  from patchwork.models import State
> +from patchwork.models import SubmissionTag
>  from patchwork.parser import clean_subject
>  
>  
> @@ -75,7 +77,7 @@ class StateField(RelatedField):
>          return State.objects.all()
>  
>  
> -class PatchListSerializer(HyperlinkedModelSerializer):
> +class PatchListSerializer(BaseHyperlinkedModelSerializer):
>  
>      project = ProjectSerializer(read_only=True)
>      state = StateField()
> @@ -92,9 +94,28 @@ class PatchListSerializer(HyperlinkedModelSerializer):
>          return request.build_absolute_uri(instance.get_mbox_url())
>  
>      def get_tags(self, instance):
> -        # TODO(stephenfin): Make tags performant, possibly by reworking the
> -        # model
> -        return {}
> +        tags = instance.project.tags
> +        if not tags:
> +            return {}
> +
> +        sub_ids = [instance.id]
> +        cover = SeriesPatch.objects.get(
> +            patch_id=instance.id).series.cover_letter

Oh, so this is interesting. We've designed patches so that they can be
assigned to multiple series. The idea behind this is to allow us to
create new series where someone submits a single patch of series for a
new revision of the series (i.e. 'PATCH 5/6 v2') without sending the
rest of that patch. I'm still not sure if we're going to add this
feature (it's very tricky to implement) but you can't really use 'get'
here in case we do. I'm not sure how to resolve this...

> +        if cover:
> +            sub_ids.append(cover.id)
> +
> +        all_tags = {}
> +        related_tags = SubmissionTag.objects.filter(
> +            submission__id__in=sub_ids).values_list('tag__name',
> +                                                    'value').distinct()
> +        for tag in tags:
> +            all_tags[tag.name] = [related_tag[1] for related_tag in
> +                                  related_tags if related_tag[0] == tag.name]
> +            # Don't show tags that are not present
> +            if not all_tags[tag.name]:
> +                del(all_tags[tag.name])
> +
> +        return all_tags

As above, this can be simplified somewhat, I suspect.

>  
>      def get_check(self, instance):
>          return instance.combined_check_state
> @@ -115,6 +136,9 @@ class PatchListSerializer(HyperlinkedModelSerializer):
>          extra_kwargs = {
>              'url': {'view_name': 'api-patch-detail'},
>          }
> +        versioned_fields = {
> +            '1.1': ('tags', ),
> +        }
>  
>  
>  class PatchDetailSerializer(PatchListSerializer):
> @@ -136,6 +160,7 @@ class PatchDetailSerializer(PatchListSerializer):
>          read_only_fields = PatchListSerializer.Meta.read_only_fields + (
>              'headers', 'content', 'diff', 'prefixes')
>          extra_kwargs = PatchListSerializer.Meta.extra_kwargs
> +        versioned_fields = PatchListSerializer.Meta.versioned_fields
>  
>  
>  class PatchList(ListAPIView):
> diff --git a/patchwork/management/commands/retag.py b/patchwork/management/commands/retag.py
> index 8617ff4..db40256 100644
> --- a/patchwork/management/commands/retag.py
> +++ b/patchwork/management/commands/retag.py
> @@ -19,11 +19,12 @@
>  
>  from django.core.management.base import BaseCommand
>  
> +from patchwork.models import Cover
>  from patchwork.models import Patch
>  
>  
>  class Command(BaseCommand):
> -    help = 'Update the tag (Ack/Review/Test) counts on existing patches'
> +    help = 'Update tags on existing patches'
>      args = '[<patch_id>...]'
>  
>      def handle(self, *args, **options):
> @@ -37,7 +38,17 @@ class Command(BaseCommand):
>          count = query.count()
>  
>          for i, patch in enumerate(query.iterator()):
> -            patch.refresh_tag_counts()
> +            patch.refresh_tags()
> +            for comment in patch.comments.all():
> +                comment.refresh_tags()
> +
> +            cover = SeriesPatch.objects.get(
> +                patch_id=patch.id).series.cover_letter
> +            if cover:
> +                cover.refresh_tags()
> +                for comment in cover.comments.all():
> +                    comment.refresh_tags()
> +
>              if (i % 10) == 0:
>                  self.stdout.write('%06d/%06d\r' % (i, count), ending='')
>                  self.stdout.flush()
> diff --git a/patchwork/migrations/0026_rework_tagging.py b/patchwork/migrations/0026_rework_tagging.py
> new file mode 100644
> index 0000000..88ddcac
> --- /dev/null
> +++ b/patchwork/migrations/0026_rework_tagging.py
> @@ -0,0 +1,123 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +import django.db.models.deletion
> +
> +import re
> +
> +
> +# Django migrations don't allow us to call models' methods because the
> +# migration will break if the methods change. Therefore we need to use an
> +# altered copy of all the code needed.
> +def extract_tags(extract_from, tags):
> +    found_tags = {}
> +
> +    if not extract_from.content:
> +        return found_tags
> +
> +    for tag in tags:
> +        regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
> +        found_tags[tag] = regex.findall(extract_from.content)
> +
> +    return found_tags
> +
> +
> +def save_tag_values(apps, submission, tag, values, comment_origin=None):
> +    if not values:
> +        # We don't need to delete tags since none exist yet and we can't
> +        # delete comments etc. during the migration
> +        return
> +
> +    SubmissionTag = apps.get_model('patchwork', 'SubmissionTag')
> +    SubmissionTag.objects.bulk_create([SubmissionTag(
> +        submission=submission,
> +        tag=tag,
> +        value=value,
> +        from_comment=comment_origin
> +    ) for value in values])
> +
> +
> +def create_all(apps, schema_editor):
> +    Tag = apps.get_model('patchwork', 'Tag')
> +    tags = Tag.objects.all()
> +
> +    Submission = apps.get_model('patchwork', 'Submission')
> +    for submission in Submission.objects.all():
> +        extracted = extract_tags(submission, tags)
> +        for tag in extracted:
> +            save_tag_values(apps, submission, tag, extracted[tag])
> +
> +    Comment = apps.get_model('patchwork', 'Comment')
> +    for comment in Comment.objects.all():
> +        extracted = extract_tags(comment, tags)
> +        for tag in extracted:
> +            save_tag_values(apps,
> +                            comment.submission,
> +                            tag,
> +                            extracted[tag],
> +                            comment_origin=comment.id)
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0025_add_regex_validators'),
> +    ]
> +
> +    operations = [
> +        migrations.CreateModel(
> +            name='SubmissionTag',
> +            fields=[
> +                ('id', models.AutoField(auto_created=True,
> +                                        primary_key=True,
> +                                        serialize=False,
> +                                        verbose_name='ID')),
> +                ('value', models.CharField(max_length=255)),
> +                ('from_comment', models.IntegerField(null=True)),
> +                ('submission', models.ForeignKey(
> +                    on_delete=django.db.models.deletion.CASCADE,
> +                    to='patchwork.Submission'
> +                )),
> +                ('tag', models.ForeignKey(
> +                    on_delete=django.db.models.deletion.CASCADE,
> +                    to='patchwork.Tag'
> +                )),
> +            ],
> +        ),
> +        migrations.AlterUniqueTogether(
> +            name='patchtag',
> +            unique_together=set([]),
> +        ),
> +        migrations.RemoveField(
> +            model_name='patchtag',
> +            name='patch',
> +        ),
> +        migrations.RemoveField(
> +            model_name='patchtag',
> +            name='tag',
> +        ),
> +        migrations.RemoveField(
> +            model_name='patch',
> +            name='tags',
> +        ),
> +        migrations.DeleteModel(
> +            name='PatchTag',
> +        ),
> +        migrations.AddField(
> +            model_name='submission',
> +            name='related_tags',
> +            field=models.ManyToManyField(
> +                through='patchwork.SubmissionTag',
> +                to='patchwork.Tag'
> +            ),
> +        ),
> +        migrations.AlterUniqueTogether(
> +            name='submissiontag',
> +            unique_together=set([('submission',
> +                                  'tag',
> +                                  'value',
> +                                  'from_comment')]),
> +        ),
> +        migrations.RunPython(create_all, atomic=False),
> +    ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index f91b994..84447cc 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -20,8 +20,6 @@
>  
>  from __future__ import absolute_import
>  
> -from collections import Counter
> -from collections import OrderedDict
>  import datetime
>  import random
>  import re
> @@ -252,10 +250,6 @@ class Tag(models.Model):
>                                        ' tag\'s count in the patch list view',
>                                        default=True)
>  
> -    @property
> -    def attr_name(self):
> -        return 'tag_%d_count' % self.id
> -
>      def __str__(self):
>          return self.name
>  
> @@ -263,64 +257,20 @@ class Tag(models.Model):
>          ordering = ['abbrev']
>  
>  
> -class PatchTag(models.Model):
> -    patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
> -    tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
> -    count = models.IntegerField(default=1)
> +class SubmissionTag(models.Model):
> +    submission = models.ForeignKey('Submission')
> +    tag = models.ForeignKey('Tag')
> +    value = models.CharField(max_length=255)
> +    from_comment = models.IntegerField(null=True)

Rather than storing the ID this way, couldn't you use a nullable
ForeignKey and rename the field 'comment'?

In addition, I think I mentioned this before but could we store
'series' here too? That would mean we could query all tags for a patch
and its series, avoiding having to jump from patch -> series -> cover
letter (which is going to be slow).

>  
>      class Meta:
> -        unique_together = [('patch', 'tag')]
> +        unique_together = [('submission', 'tag', 'value', 'from_comment')]
>  
>  
>  def get_default_initial_patch_state():
>      return State.objects.get(ordering=0)
>  
>  
> -class PatchQuerySet(models.query.QuerySet):
> -
> -    def with_tag_counts(self, project=None):
> -        if project and not project.use_tags:
> -            return self
> -
> -        # We need the project's use_tags field loaded for Project.tags().
> -        # Using prefetch_related means we'll share the one instance of
> -        # Project, and share the project.tags cache between all patch.project
> -        # references.
> -        qs = self.prefetch_related('project')
> -        select = OrderedDict()
> -        select_params = []
> -
> -        # All projects have the same tags, so we're good to go here
> -        if project:
> -            tags = project.tags
> -        else:
> -            tags = Tag.objects.all()
> -
> -        for tag in tags:
> -            select[tag.attr_name] = (
> -                "coalesce("
> -                "(SELECT count FROM patchwork_patchtag"
> -                " WHERE patchwork_patchtag.patch_id="
> -                "patchwork_patch.submission_ptr_id"
> -                " AND patchwork_patchtag.tag_id=%s), 0)")
> -            select_params.append(tag.id)
> -
> -        return qs.extra(select=select, select_params=select_params)
> -
> -
> -class PatchManager(models.Manager):
> -    use_for_related_fields = True
> -    # NOTE(stephenfin): This is necessary to silence a warning with Django >=
> -    # 1.10. Remove when 1.10 is the minimum supported version.
> -    silence_use_for_related_fields_deprecation = True
> -
> -    def get_queryset(self):
> -        return PatchQuerySet(self.model, using=self.db)
> -
> -    def with_tag_counts(self, project):
> -        return self.get_queryset().with_tag_counts(project)
> -
> -

Happy to see these things going away.

>  class EmailMixin(models.Model):
>      """Mixin for models with an email-origin."""
>      # email metadata
> @@ -334,17 +284,16 @@ class EmailMixin(models.Model):
>      submitter = models.ForeignKey(Person, on_delete=models.CASCADE)
>      content = models.TextField(null=True, blank=True)
>  
> -    response_re = re.compile(
> -        r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$',
> -        re.M | re.I)
> +    def _extract_tags(self, tags):
> +        found_tags = {}
>  
> -    @property
> -    def patch_responses(self):
>          if not self.content:
> -            return ''
> +            return found_tags
>  
> -        return ''.join([match.group(0) + '\n' for match in
> -                        self.response_re.finditer(self.content)])
> +        for tag in tags:
> +            regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
> +            found_tags[tag] = regex.findall(self.content)
> +        return found_tags

I was trying to figure out a way to use regex groups so we could search
for all tags at once. However, I don't think this is possible and it's
probably not worth the effort trying to figure out.

>  
>      def save(self, *args, **kwargs):
>          # Modifying a submission via admin interface changes '\n' newlines in
> @@ -353,6 +302,7 @@ class EmailMixin(models.Model):
>          # on PY2
>          self.content = self.content.replace('\r\n', '\n')
>          super(EmailMixin, self).save(*args, **kwargs)
> +        self.refresh_tags()

As an aside, I wonder if we want to do this here rather than at parse
time (patchwork/parser.py)? It seems like an expensive operation and we
don't currently allow anyone to modify the bodies of either submissions
or comments.

>      class Meta:
>          abstract = True
> @@ -377,6 +327,34 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
>      # submission metadata
>  
>      name = models.CharField(max_length=255)
> +    related_tags = models.ManyToManyField(Tag, through=SubmissionTag)
> +
> +    def save_tag_values(self, tag, values, comment_origin=None):

Can you call this 'add_tags'?

> +        current_objs = SubmissionTag.objects.filter(
> +            submission=self,
> +            from_comment=comment_origin,
> +            tag=tag
> +        )
> +
> +        if not values:
> +            current_objs.delete()
> +            return
> +
> +        # In case the origin is modified, delete tags that were removed
> +        current_objs.exclude(value__in=values).delete()
> +
> +        values_to_add = set(values) - set(current_objs.values_list('value'))
> +        SubmissionTag.objects.bulk_create([SubmissionTag(
> +            submission=self,
> +            tag=tag,
> +            value=value,
> +            from_comment=comment_origin
> +        ) for value in values_to_add])
> +
> +    def refresh_tags(self):
> +        submission_tags = self._extract_tags(Tag.objects.all())
> +        for tag in submission_tags:
> +            self.save_tag_values(tag, submission_tags[tag])
>  
>      # patchwork metadata
>  
> @@ -386,6 +364,10 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
>      def __str__(self):
>          return self.name
>  
> +    def delete(self, *args, **kwargs):
> +        SubmissionTag.objects.filter(submission=self).delete()
> +        super(Submission, self).delete(*args, **kwargs)
> +

This will happen automatically if you define 'on_delete=models.CASCADE'
on the ForeignKey in through table (SubmissionTag).

>      class Meta:
>          ordering = ['date']
>          unique_together = [('msgid', 'project')]
> @@ -423,7 +405,6 @@ class Patch(SeriesMixin, Submission):
>      diff = models.TextField(null=True, blank=True)
>      commit_ref = models.CharField(max_length=255, null=True, blank=True)
>      pull_url = models.CharField(max_length=255, null=True, blank=True)
> -    tags = models.ManyToManyField(Tag, through=PatchTag)
>  
>      # patchwork metadata
>  
> @@ -437,40 +418,6 @@ class Patch(SeriesMixin, Submission):
>      # patches in a project without needing to do a JOIN.
>      patch_project = models.ForeignKey(Project, on_delete=models.CASCADE)
>  
> -    objects = PatchManager()
> -
> -    @staticmethod
> -    def extract_tags(content, tags):
> -        counts = Counter()
> -
> -        for tag in tags:
> -            regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
> -            counts[tag] = len(regex.findall(content))
> -
> -        return counts
> -
> -    def _set_tag(self, tag, count):
> -        if count == 0:
> -            self.patchtag_set.filter(tag=tag).delete()
> -            return
> -        patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
> -        if patchtag.count != count:
> -            patchtag.count = count
> -            patchtag.save()
> -
> -    def refresh_tag_counts(self):
> -        tags = self.project.tags
> -        counter = Counter()
> -
> -        if self.content:
> -            counter += self.extract_tags(self.content, tags)
> -
> -        for comment in self.comments.all():
> -            counter = counter + self.extract_tags(comment.content, tags)
> -
> -        for tag in tags:
> -            self._set_tag(tag, counter[tag])
> -
>      def save(self, *args, **kwargs):
>          if not hasattr(self, 'state') or not self.state:
>              self.state = get_default_initial_patch_state()
> @@ -480,8 +427,6 @@ class Patch(SeriesMixin, Submission):
>  
>          super(Patch, self).save(**kwargs)
>  
> -        self.refresh_tag_counts()
> -
>      def is_editable(self, user):
>          if not is_authenticated(user):
>              return False
> @@ -492,6 +437,22 @@ class Patch(SeriesMixin, Submission):
>          return self.project.is_editable(user)
>  
>      @property
> +    def all_tags(self):
> +        sub_ids = [self.id]
> +        cover = SeriesPatch.objects.get(patch_id=self.id).series.cover_letter
> +        if cover:
> +            sub_ids.append(cover.id)
> +        related_tags = SubmissionTag.objects.filter(
> +            submission__id__in=sub_ids).values_list('tag__name',
> +                                                    'value').distinct()
> +
> +        patch_tags = {}
> +        for tag in self.project.tags:
> +            patch_tags[tag] = [related_tag[1] for related_tag in related_tags
> +                               if related_tag[0] == tag.name]
> +        return patch_tags
> +

As noted above, adding a 'series' ForeignKey would allow us to simplify
this check somewhat.

    return SeriesPatch.objects.get(Q(submission=self.id) | Q(series=self.series.id))

(the syntax for that could be wrong but you get the idea). However,
this should definitely be tested as performance could be worse (I don't
think it would but I haven't checked).

> +    @property
>      def combined_check_state(self):
>          """Return the combined state for all checks.
>  
> @@ -603,15 +564,16 @@ class Comment(EmailMixin, models.Model):
>                                     related_query_name='comment',
>                                     on_delete=models.CASCADE)
>  
> -    def save(self, *args, **kwargs):
> -        super(Comment, self).save(*args, **kwargs)
> -        if hasattr(self.submission, 'patch'):
> -            self.submission.patch.refresh_tag_counts()
> +    def refresh_tags(self):
> +        comment_tags = self._extract_tags(Tag.objects.all())
> +        for tag in comment_tags:
> +            self.submission.save_tag_values(tag,
> +                                            comment_tags[tag],
> +                                            comment_origin=self.id)

As noted above, I'd prefer to just use a nullable ForeignKey for this.
>  
>      def delete(self, *args, **kwargs):
> +        SubmissionTag.objects.filter(from_comment=self.id).delete()

Also noted above, use 'on_delete' to avoid the need to do this.

>          super(Comment, self).delete(*args, **kwargs)
> -        if hasattr(self.submission, 'patch'):
> -            self.submission.patch.refresh_tag_counts()
>  
>      class Meta:
>          ordering = ['date']
> diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py
> index 4350e09..f226dc0 100644
> --- a/patchwork/templatetags/patch.py
> +++ b/patchwork/templatetags/patch.py
> @@ -34,8 +34,9 @@ register = template.Library()
>  def patch_tags(patch):
>      counts = []
>      titles = []
> +    all_tags = patch.all_tags
>      for tag in [t for t in patch.project.tags if t.show_column]:
> -        count = getattr(patch, tag.attr_name)
> +        count = len(all_tags[tag])
>          titles.append('%d %s' % (count, tag.name))
>          if count == 0:
>              counts.append("-")
> diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
> index f8d23a3..8c41df6 100644
> --- a/patchwork/views/__init__.py
> +++ b/patchwork/views/__init__.py
> @@ -272,9 +272,6 @@ def generic_list(request, project, view, view_args=None, filter_settings=None,
>      if patches is None:
>          patches = Patch.objects.filter(patch_project=project)
>  
> -    # annotate with tag counts
> -    patches = patches.with_tag_counts(project)
> -
>      patches = context['filters'].apply(patches)
>      if not editable_order:
>          patches = order.apply(patches)
> diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py
> index f5ff43c..8a0ce23 100644
> --- a/patchwork/views/utils.py
> +++ b/patchwork/views/utils.py
> @@ -32,6 +32,8 @@ from django.utils import six
>  
>  from patchwork.models import Comment
>  from patchwork.models import Patch
> +from patchwork.models import SeriesPatch
> +from patchwork.models import SubmissionTag
>  from patchwork.models import Series
>  
>  if settings.ENABLE_REST_API:
> @@ -75,9 +77,17 @@ def _submission_to_mbox(submission):
>      else:
>          postscript = ''
>  
> -    # TODO(stephenfin): Make this use the tags infrastructure
> -    for comment in Comment.objects.filter(submission=submission):
> -        body += comment.patch_responses
> +    sub_ids = [submission.id]
> +    if is_patch:
> +        cover = SeriesPatch.objects.get(
> +            patch_id=submission.id).series.cover_letter
> +        if cover:
> +            sub_ids.append(cover.id)
> +
> +    for (tagname, value) in SubmissionTag.objects.filter(
> +            submission__id__in=sub_ids).values_list(
> +                'tag__name', 'value').distinct():
> +        body += '%s: %s\n' % (tagname, value)
>  
>      if postscript:
>          body += '---\n' + postscript + '\n'
Veronika Kabatova April 25, 2018, 10:53 a.m. UTC | #2
----- Original Message -----
> From: "Stephen Finucane" <stephen@that.guru>
> To: vkabatov@redhat.com, patchwork@lists.ozlabs.org
> Sent: Wednesday, April 25, 2018 12:13:40 PM
> Subject: Re: [RFC v2 1/1] Rework tagging infrastructure
> 
> On Thu, 2018-04-19 at 18:36 +0200, vkabatov@redhat.com wrote:
> > From: Veronika Kabatova <vkabatov@redhat.com>
> > 
> > Solve #113 and #57 GitHub issues, fix up returning tags in the API,
> > keep track of tag origin to later be able to add tags to comments in
> > the API.
> > 
> > Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of
> > tags for each patch in series.
> > 
> > Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
> 
> Comments below. Most of them are little improvements I'd like to see,
> but there is one big issue around the difference between SeriesPatch
> and Patch that we need to think about.
> 

Thanks for comments! I'll check out and include the small improvements
later, but let's figure out the major things first.

> Stephen
> 
> > ---
> >  docs/usage/overview.rst                     |   9 +-
> >  patchwork/api/cover.py                      |  24 +++-
> >  patchwork/api/patch.py                      |  35 +++++-
> >  patchwork/management/commands/retag.py      |  15 ++-
> >  patchwork/migrations/0026_rework_tagging.py | 123 +++++++++++++++++++
> >  patchwork/models.py                         | 176
> >  +++++++++++-----------------
> >  patchwork/templatetags/patch.py             |   3 +-
> >  patchwork/views/__init__.py                 |   3 -
> >  patchwork/views/utils.py                    |  16 ++-
> >  9 files changed, 277 insertions(+), 127 deletions(-)
> >  create mode 100644 patchwork/migrations/0026_rework_tagging.py
> > 
> > diff --git a/docs/usage/overview.rst b/docs/usage/overview.rst
> > index cc193f3..d7db840 100644
> > --- a/docs/usage/overview.rst
> > +++ b/docs/usage/overview.rst
> > @@ -110,10 +110,11 @@ one delegate can be assigned to a patch.
> >  Tags
> >  ~~~~
> >  
> > -Tags are specially formatted metadata appended to the foot the body of a
> > patch
> > -or a comment on a patch. Patchwork extracts these tags at parse time and
> > -associates them with the patch. You add extra tags to an email by replying
> > to
> > -the email. The following tags are available on a standard Patchwork
> > install:
> > +Tags are specially formatted metadata appended to the foot the body of a
> > patch,
> > +cover letter or a comment related to them. Patchwork extracts these tags
> > at
> > +parse time and associates them with patches. You add extra tags to an
> > email by
> > +replying to the email. The following tags are available on a standard
> > Patchwork
> > +install:
> >  
> >  Acked-by:
> >  
> > diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py
> > index fc7ae97..4d82277 100644
> > --- a/patchwork/api/cover.py
> > +++ b/patchwork/api/cover.py
> > @@ -29,6 +29,7 @@ from patchwork.api.embedded import PersonSerializer
> >  from patchwork.api.embedded import ProjectSerializer
> >  from patchwork.api.embedded import SeriesSerializer
> >  from patchwork.models import CoverLetter
> > +from patchwork.models import SubmissionTag
> >  
> >  
> >  class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
> > @@ -37,18 +38,36 @@ class
> > CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
> >      submitter = PersonSerializer(read_only=True)
> >      mbox = SerializerMethodField()
> >      series = SeriesSerializer(many=True, read_only=True)
> > +    tags = SerializerMethodField()
> >  
> >      def get_mbox(self, instance):
> >          request = self.context.get('request')
> >          return request.build_absolute_uri(instance.get_mbox_url())
> >  
> > +    def get_tags(self, instance):
> > +        tags = instance.project.tags
> > +        if not tags:
> > +            return {}
> > +
> > +        all_tags = {}
> > +        related_tags = SubmissionTag.objects.filter(
> > +            submission=instance).values_list('tag__name',
> > 'value').distinct()
> > +        for tag in tags:
> > +            all_tags[tag.name] = [related_tag[1] for related_tag in
> > +                                  related_tags if related_tag[0] ==
> > tag.name]
> > +            # Don't show tags that are not present
> > +            if not all_tags[tag.name]:
> > +                del(all_tags[tag.name])
> > +
> > +        return all_tags
> 
> This whole thing can be simplified to avoid accessing tags through
> 'instance.project.tags' property (which will be yet another DB access).
> How about:
> 
>   def get_tags(self, instance):
>       if not instance.project.use_tags:
>           return {}
> 
>       tags = SubmissionTag.objects.filter(
>           submission=instance).values_list('tag__name', 'value').distinct()
>       
>       result = {}
>       for name, value in tags:
>           result..setdefault(name, []).append(value)
> 
>       return result
> 
> This could probably be a property on the 'Tags' object itself too.
> 
> > +
> >      class Meta:
> >          model = CoverLetter
> >          fields = ('id', 'url', 'project', 'msgid', 'date', 'name',
> >          'submitter',
> > -                  'mbox', 'series')
> > +                  'mbox', 'series', 'tags')
> >          read_only_fields = fields
> >          versioned_fields = {
> > -            '1.1': ('mbox', ),
> > +            '1.1': ('mbox', 'tags'),
> >          }
> >          extra_kwargs = {
> >              'url': {'view_name': 'api-cover-detail'},
> > @@ -68,6 +87,7 @@ class
> > CoverLetterDetailSerializer(CoverLetterListSerializer):
> >          fields = CoverLetterListSerializer.Meta.fields + ('headers',
> >          'content')
> >          read_only_fields = fields
> >          extra_kwargs = CoverLetterListSerializer.Meta.extra_kwargs
> > +        versioned_fields = CoverLetterListSerializer.Meta.versioned_fields
> >  
> >  
> >  class CoverLetterList(ListAPIView):
> > diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
> > index 115feff..295b046 100644
> > --- a/patchwork/api/patch.py
> > +++ b/patchwork/api/patch.py
> > @@ -24,9 +24,9 @@ from rest_framework.generics import ListAPIView
> >  from rest_framework.generics import RetrieveUpdateAPIView
> >  from rest_framework.relations import RelatedField
> >  from rest_framework.reverse import reverse
> > -from rest_framework.serializers import HyperlinkedModelSerializer
> >  from rest_framework.serializers import SerializerMethodField
> >  
> > +from patchwork.api.base import BaseHyperlinkedModelSerializer
> >  from patchwork.api.base import PatchworkPermission
> >  from patchwork.api.filters import PatchFilter
> >  from patchwork.api.embedded import PersonSerializer
> > @@ -34,7 +34,9 @@ from patchwork.api.embedded import ProjectSerializer
> >  from patchwork.api.embedded import SeriesSerializer
> >  from patchwork.api.embedded import UserSerializer
> >  from patchwork.models import Patch
> > +from patchwork.models import SeriesPatch
> >  from patchwork.models import State
> > +from patchwork.models import SubmissionTag
> >  from patchwork.parser import clean_subject
> >  
> >  
> > @@ -75,7 +77,7 @@ class StateField(RelatedField):
> >          return State.objects.all()
> >  
> >  
> > -class PatchListSerializer(HyperlinkedModelSerializer):
> > +class PatchListSerializer(BaseHyperlinkedModelSerializer):
> >  
> >      project = ProjectSerializer(read_only=True)
> >      state = StateField()
> > @@ -92,9 +94,28 @@ class PatchListSerializer(HyperlinkedModelSerializer):
> >          return request.build_absolute_uri(instance.get_mbox_url())
> >  
> >      def get_tags(self, instance):
> > -        # TODO(stephenfin): Make tags performant, possibly by reworking
> > the
> > -        # model
> > -        return {}
> > +        tags = instance.project.tags
> > +        if not tags:
> > +            return {}
> > +
> > +        sub_ids = [instance.id]
> > +        cover = SeriesPatch.objects.get(
> > +            patch_id=instance.id).series.cover_letter
> 
> Oh, so this is interesting. We've designed patches so that they can be
> assigned to multiple series. The idea behind this is to allow us to
> create new series where someone submits a single patch of series for a
> new revision of the series (i.e. 'PATCH 5/6 v2') without sending the
> rest of that patch. I'm still not sure if we're going to add this
> feature (it's very tricky to implement) but you can't really use 'get'
> here in case we do. I'm not sure how to resolve this...
> 

Well, I can't use patch.series either because of this many-to-many
relationship. Can we just use this for now and in case somebody decides
to implement the feature, they'll fix all the little things that would
come with it? As you say, implementing that correctly doesn't look easy
at all. Also, do we want to account for nonexistent features?

> > +        if cover:
> > +            sub_ids.append(cover.id)
> > +
> > +        all_tags = {}
> > +        related_tags = SubmissionTag.objects.filter(
> > +            submission__id__in=sub_ids).values_list('tag__name',
> > +                                                    'value').distinct()
> > +        for tag in tags:
> > +            all_tags[tag.name] = [related_tag[1] for related_tag in
> > +                                  related_tags if related_tag[0] ==
> > tag.name]
> > +            # Don't show tags that are not present
> > +            if not all_tags[tag.name]:
> > +                del(all_tags[tag.name])
> > +
> > +        return all_tags
> 
> As above, this can be simplified somewhat, I suspect.
> 
> >  
> >      def get_check(self, instance):
> >          return instance.combined_check_state
> > @@ -115,6 +136,9 @@ class PatchListSerializer(HyperlinkedModelSerializer):
> >          extra_kwargs = {
> >              'url': {'view_name': 'api-patch-detail'},
> >          }
> > +        versioned_fields = {
> > +            '1.1': ('tags', ),
> > +        }
> >  
> >  
> >  class PatchDetailSerializer(PatchListSerializer):
> > @@ -136,6 +160,7 @@ class PatchDetailSerializer(PatchListSerializer):
> >          read_only_fields = PatchListSerializer.Meta.read_only_fields + (
> >              'headers', 'content', 'diff', 'prefixes')
> >          extra_kwargs = PatchListSerializer.Meta.extra_kwargs
> > +        versioned_fields = PatchListSerializer.Meta.versioned_fields
> >  
> >  
> >  class PatchList(ListAPIView):
> > diff --git a/patchwork/management/commands/retag.py
> > b/patchwork/management/commands/retag.py
> > index 8617ff4..db40256 100644
> > --- a/patchwork/management/commands/retag.py
> > +++ b/patchwork/management/commands/retag.py
> > @@ -19,11 +19,12 @@
> >  
> >  from django.core.management.base import BaseCommand
> >  
> > +from patchwork.models import Cover
> >  from patchwork.models import Patch
> >  
> >  
> >  class Command(BaseCommand):
> > -    help = 'Update the tag (Ack/Review/Test) counts on existing patches'
> > +    help = 'Update tags on existing patches'
> >      args = '[<patch_id>...]'
> >  
> >      def handle(self, *args, **options):
> > @@ -37,7 +38,17 @@ class Command(BaseCommand):
> >          count = query.count()
> >  
> >          for i, patch in enumerate(query.iterator()):
> > -            patch.refresh_tag_counts()
> > +            patch.refresh_tags()
> > +            for comment in patch.comments.all():
> > +                comment.refresh_tags()
> > +
> > +            cover = SeriesPatch.objects.get(
> > +                patch_id=patch.id).series.cover_letter
> > +            if cover:
> > +                cover.refresh_tags()
> > +                for comment in cover.comments.all():
> > +                    comment.refresh_tags()
> > +
> >              if (i % 10) == 0:
> >                  self.stdout.write('%06d/%06d\r' % (i, count), ending='')
> >                  self.stdout.flush()
> > diff --git a/patchwork/migrations/0026_rework_tagging.py
> > b/patchwork/migrations/0026_rework_tagging.py
> > new file mode 100644
> > index 0000000..88ddcac
> > --- /dev/null
> > +++ b/patchwork/migrations/0026_rework_tagging.py
> > @@ -0,0 +1,123 @@
> > +# -*- coding: utf-8 -*-
> > +from __future__ import unicode_literals
> > +
> > +from django.db import migrations, models
> > +import django.db.models.deletion
> > +
> > +import re
> > +
> > +
> > +# Django migrations don't allow us to call models' methods because the
> > +# migration will break if the methods change. Therefore we need to use an
> > +# altered copy of all the code needed.
> > +def extract_tags(extract_from, tags):
> > +    found_tags = {}
> > +
> > +    if not extract_from.content:
> > +        return found_tags
> > +
> > +    for tag in tags:
> > +        regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
> > +        found_tags[tag] = regex.findall(extract_from.content)
> > +
> > +    return found_tags
> > +
> > +
> > +def save_tag_values(apps, submission, tag, values, comment_origin=None):
> > +    if not values:
> > +        # We don't need to delete tags since none exist yet and we can't
> > +        # delete comments etc. during the migration
> > +        return
> > +
> > +    SubmissionTag = apps.get_model('patchwork', 'SubmissionTag')
> > +    SubmissionTag.objects.bulk_create([SubmissionTag(
> > +        submission=submission,
> > +        tag=tag,
> > +        value=value,
> > +        from_comment=comment_origin
> > +    ) for value in values])
> > +
> > +
> > +def create_all(apps, schema_editor):
> > +    Tag = apps.get_model('patchwork', 'Tag')
> > +    tags = Tag.objects.all()
> > +
> > +    Submission = apps.get_model('patchwork', 'Submission')
> > +    for submission in Submission.objects.all():
> > +        extracted = extract_tags(submission, tags)
> > +        for tag in extracted:
> > +            save_tag_values(apps, submission, tag, extracted[tag])
> > +
> > +    Comment = apps.get_model('patchwork', 'Comment')
> > +    for comment in Comment.objects.all():
> > +        extracted = extract_tags(comment, tags)
> > +        for tag in extracted:
> > +            save_tag_values(apps,
> > +                            comment.submission,
> > +                            tag,
> > +                            extracted[tag],
> > +                            comment_origin=comment.id)
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [
> > +        ('patchwork', '0025_add_regex_validators'),
> > +    ]
> > +
> > +    operations = [
> > +        migrations.CreateModel(
> > +            name='SubmissionTag',
> > +            fields=[
> > +                ('id', models.AutoField(auto_created=True,
> > +                                        primary_key=True,
> > +                                        serialize=False,
> > +                                        verbose_name='ID')),
> > +                ('value', models.CharField(max_length=255)),
> > +                ('from_comment', models.IntegerField(null=True)),
> > +                ('submission', models.ForeignKey(
> > +                    on_delete=django.db.models.deletion.CASCADE,
> > +                    to='patchwork.Submission'
> > +                )),
> > +                ('tag', models.ForeignKey(
> > +                    on_delete=django.db.models.deletion.CASCADE,
> > +                    to='patchwork.Tag'
> > +                )),
> > +            ],
> > +        ),
> > +        migrations.AlterUniqueTogether(
> > +            name='patchtag',
> > +            unique_together=set([]),
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='patchtag',
> > +            name='patch',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='patchtag',
> > +            name='tag',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='patch',
> > +            name='tags',
> > +        ),
> > +        migrations.DeleteModel(
> > +            name='PatchTag',
> > +        ),
> > +        migrations.AddField(
> > +            model_name='submission',
> > +            name='related_tags',
> > +            field=models.ManyToManyField(
> > +                through='patchwork.SubmissionTag',
> > +                to='patchwork.Tag'
> > +            ),
> > +        ),
> > +        migrations.AlterUniqueTogether(
> > +            name='submissiontag',
> > +            unique_together=set([('submission',
> > +                                  'tag',
> > +                                  'value',
> > +                                  'from_comment')]),
> > +        ),
> > +        migrations.RunPython(create_all, atomic=False),
> > +    ]
> > diff --git a/patchwork/models.py b/patchwork/models.py
> > index f91b994..84447cc 100644
> > --- a/patchwork/models.py
> > +++ b/patchwork/models.py
> > @@ -20,8 +20,6 @@
> >  
> >  from __future__ import absolute_import
> >  
> > -from collections import Counter
> > -from collections import OrderedDict
> >  import datetime
> >  import random
> >  import re
> > @@ -252,10 +250,6 @@ class Tag(models.Model):
> >                                        ' tag\'s count in the patch list
> >                                        view',
> >                                        default=True)
> >  
> > -    @property
> > -    def attr_name(self):
> > -        return 'tag_%d_count' % self.id
> > -
> >      def __str__(self):
> >          return self.name
> >  
> > @@ -263,64 +257,20 @@ class Tag(models.Model):
> >          ordering = ['abbrev']
> >  
> >  
> > -class PatchTag(models.Model):
> > -    patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
> > -    tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
> > -    count = models.IntegerField(default=1)
> > +class SubmissionTag(models.Model):
> > +    submission = models.ForeignKey('Submission')
> > +    tag = models.ForeignKey('Tag')
> > +    value = models.CharField(max_length=255)
> > +    from_comment = models.IntegerField(null=True)
> 
> Rather than storing the ID this way, couldn't you use a nullable
> ForeignKey and rename the field 'comment'?
> 
> In addition, I think I mentioned this before but could we store
> 'series' here too? That would mean we could query all tags for a patch
> and its series, avoiding having to jump from patch -> series -> cover
> letter (which is going to be slow).
> 

Just to be clear, by "patch and it's series" you mean only the patch itself
and the cover letter, right? Assigning a tag to patch1 that was intended for
patch2 from the same series isn't what we want to do.

If I implement this change, how do you propose we go around the multiple
series thing since this will be the same situation as with the SeriesPatch
essentially? Or will we go the "ignore for now" route since it's not in the
works now?

> >  
> >      class Meta:
> > -        unique_together = [('patch', 'tag')]
> > +        unique_together = [('submission', 'tag', 'value', 'from_comment')]
> >  
> >  
> >  def get_default_initial_patch_state():
> >      return State.objects.get(ordering=0)
> >  
> >  
> > -class PatchQuerySet(models.query.QuerySet):
> > -
> > -    def with_tag_counts(self, project=None):
> > -        if project and not project.use_tags:
> > -            return self
> > -
> > -        # We need the project's use_tags field loaded for Project.tags().
> > -        # Using prefetch_related means we'll share the one instance of
> > -        # Project, and share the project.tags cache between all
> > patch.project
> > -        # references.
> > -        qs = self.prefetch_related('project')
> > -        select = OrderedDict()
> > -        select_params = []
> > -
> > -        # All projects have the same tags, so we're good to go here
> > -        if project:
> > -            tags = project.tags
> > -        else:
> > -            tags = Tag.objects.all()
> > -
> > -        for tag in tags:
> > -            select[tag.attr_name] = (
> > -                "coalesce("
> > -                "(SELECT count FROM patchwork_patchtag"
> > -                " WHERE patchwork_patchtag.patch_id="
> > -                "patchwork_patch.submission_ptr_id"
> > -                " AND patchwork_patchtag.tag_id=%s), 0)")
> > -            select_params.append(tag.id)
> > -
> > -        return qs.extra(select=select, select_params=select_params)
> > -
> > -
> > -class PatchManager(models.Manager):
> > -    use_for_related_fields = True
> > -    # NOTE(stephenfin): This is necessary to silence a warning with Django
> > >=
> > -    # 1.10. Remove when 1.10 is the minimum supported version.
> > -    silence_use_for_related_fields_deprecation = True
> > -
> > -    def get_queryset(self):
> > -        return PatchQuerySet(self.model, using=self.db)
> > -
> > -    def with_tag_counts(self, project):
> > -        return self.get_queryset().with_tag_counts(project)
> > -
> > -
> 
> Happy to see these things going away.
> 
> >  class EmailMixin(models.Model):
> >      """Mixin for models with an email-origin."""
> >      # email metadata
> > @@ -334,17 +284,16 @@ class EmailMixin(models.Model):
> >      submitter = models.ForeignKey(Person, on_delete=models.CASCADE)
> >      content = models.TextField(null=True, blank=True)
> >  
> > -    response_re = re.compile(
> > -        r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$',
> > -        re.M | re.I)
> > +    def _extract_tags(self, tags):
> > +        found_tags = {}
> >  
> > -    @property
> > -    def patch_responses(self):
> >          if not self.content:
> > -            return ''
> > +            return found_tags
> >  
> > -        return ''.join([match.group(0) + '\n' for match in
> > -                        self.response_re.finditer(self.content)])
> > +        for tag in tags:
> > +            regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
> > +            found_tags[tag] = regex.findall(self.content)
> > +        return found_tags
> 
> I was trying to figure out a way to use regex groups so we could search
> for all tags at once. However, I don't think this is possible and it's
> probably not worth the effort trying to figure out.
> 
> >  
> >      def save(self, *args, **kwargs):
> >          # Modifying a submission via admin interface changes '\n' newlines
> >          in
> > @@ -353,6 +302,7 @@ class EmailMixin(models.Model):
> >          # on PY2
> >          self.content = self.content.replace('\r\n', '\n')
> >          super(EmailMixin, self).save(*args, **kwargs)
> > +        self.refresh_tags()
> 
> As an aside, I wonder if we want to do this here rather than at parse
> time (patchwork/parser.py)? It seems like an expensive operation and we
> don't currently allow anyone to modify the bodies of either submissions
> or comments.

I can find you some people that for whatever reason need to modify things
via the admin interface. Whether it's intended use case or not, it is
possible to do and I think it important to maintain consistency between
things.

> 
> >      class Meta:
> >          abstract = True
> > @@ -377,6 +327,34 @@ class Submission(FilenameMixin, EmailMixin,
> > models.Model):
> >      # submission metadata
> >  
> >      name = models.CharField(max_length=255)
> > +    related_tags = models.ManyToManyField(Tag, through=SubmissionTag)
> > +
> > +    def save_tag_values(self, tag, values, comment_origin=None):
> 
> Can you call this 'add_tags'?
> 
> > +        current_objs = SubmissionTag.objects.filter(
> > +            submission=self,
> > +            from_comment=comment_origin,
> > +            tag=tag
> > +        )
> > +
> > +        if not values:
> > +            current_objs.delete()
> > +            return
> > +
> > +        # In case the origin is modified, delete tags that were removed
> > +        current_objs.exclude(value__in=values).delete()
> > +
> > +        values_to_add = set(values) -
> > set(current_objs.values_list('value'))
> > +        SubmissionTag.objects.bulk_create([SubmissionTag(
> > +            submission=self,
> > +            tag=tag,
> > +            value=value,
> > +            from_comment=comment_origin
> > +        ) for value in values_to_add])
> > +
> > +    def refresh_tags(self):
> > +        submission_tags = self._extract_tags(Tag.objects.all())
> > +        for tag in submission_tags:
> > +            self.save_tag_values(tag, submission_tags[tag])
> >  
> >      # patchwork metadata
> >  
> > @@ -386,6 +364,10 @@ class Submission(FilenameMixin, EmailMixin,
> > models.Model):
> >      def __str__(self):
> >          return self.name
> >  
> > +    def delete(self, *args, **kwargs):
> > +        SubmissionTag.objects.filter(submission=self).delete()
> > +        super(Submission, self).delete(*args, **kwargs)
> > +
> 
> This will happen automatically if you define 'on_delete=models.CASCADE'
> on the ForeignKey in through table (SubmissionTag).
> 
> >      class Meta:
> >          ordering = ['date']
> >          unique_together = [('msgid', 'project')]
> > @@ -423,7 +405,6 @@ class Patch(SeriesMixin, Submission):
> >      diff = models.TextField(null=True, blank=True)
> >      commit_ref = models.CharField(max_length=255, null=True, blank=True)
> >      pull_url = models.CharField(max_length=255, null=True, blank=True)
> > -    tags = models.ManyToManyField(Tag, through=PatchTag)
> >  
> >      # patchwork metadata
> >  
> > @@ -437,40 +418,6 @@ class Patch(SeriesMixin, Submission):
> >      # patches in a project without needing to do a JOIN.
> >      patch_project = models.ForeignKey(Project, on_delete=models.CASCADE)
> >  
> > -    objects = PatchManager()
> > -
> > -    @staticmethod
> > -    def extract_tags(content, tags):
> > -        counts = Counter()
> > -
> > -        for tag in tags:
> > -            regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
> > -            counts[tag] = len(regex.findall(content))
> > -
> > -        return counts
> > -
> > -    def _set_tag(self, tag, count):
> > -        if count == 0:
> > -            self.patchtag_set.filter(tag=tag).delete()
> > -            return
> > -        patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
> > -        if patchtag.count != count:
> > -            patchtag.count = count
> > -            patchtag.save()
> > -
> > -    def refresh_tag_counts(self):
> > -        tags = self.project.tags
> > -        counter = Counter()
> > -
> > -        if self.content:
> > -            counter += self.extract_tags(self.content, tags)
> > -
> > -        for comment in self.comments.all():
> > -            counter = counter + self.extract_tags(comment.content, tags)
> > -
> > -        for tag in tags:
> > -            self._set_tag(tag, counter[tag])
> > -
> >      def save(self, *args, **kwargs):
> >          if not hasattr(self, 'state') or not self.state:
> >              self.state = get_default_initial_patch_state()
> > @@ -480,8 +427,6 @@ class Patch(SeriesMixin, Submission):
> >  
> >          super(Patch, self).save(**kwargs)
> >  
> > -        self.refresh_tag_counts()
> > -
> >      def is_editable(self, user):
> >          if not is_authenticated(user):
> >              return False
> > @@ -492,6 +437,22 @@ class Patch(SeriesMixin, Submission):
> >          return self.project.is_editable(user)
> >  
> >      @property
> > +    def all_tags(self):
> > +        sub_ids = [self.id]
> > +        cover =
> > SeriesPatch.objects.get(patch_id=self.id).series.cover_letter
> > +        if cover:
> > +            sub_ids.append(cover.id)
> > +        related_tags = SubmissionTag.objects.filter(
> > +            submission__id__in=sub_ids).values_list('tag__name',
> > +                                                    'value').distinct()
> > +
> > +        patch_tags = {}
> > +        for tag in self.project.tags:
> > +            patch_tags[tag] = [related_tag[1] for related_tag in
> > related_tags
> > +                               if related_tag[0] == tag.name]
> > +        return patch_tags
> > +
> 
> As noted above, adding a 'series' ForeignKey would allow us to simplify
> this check somewhat.
> 
>     return SeriesPatch.objects.get(Q(submission=self.id) |
>     Q(series=self.series.id))
> 
> (the syntax for that could be wrong but you get the idea). However,
> this should definitely be tested as performance could be worse (I don't
> think it would but I haven't checked).
> 
> > +    @property
> >      def combined_check_state(self):
> >          """Return the combined state for all checks.
> >  
> > @@ -603,15 +564,16 @@ class Comment(EmailMixin, models.Model):
> >                                     related_query_name='comment',
> >                                     on_delete=models.CASCADE)
> >  
> > -    def save(self, *args, **kwargs):
> > -        super(Comment, self).save(*args, **kwargs)
> > -        if hasattr(self.submission, 'patch'):
> > -            self.submission.patch.refresh_tag_counts()
> > +    def refresh_tags(self):
> > +        comment_tags = self._extract_tags(Tag.objects.all())
> > +        for tag in comment_tags:
> > +            self.submission.save_tag_values(tag,
> > +                                            comment_tags[tag],
> > +                                            comment_origin=self.id)
> 
> As noted above, I'd prefer to just use a nullable ForeignKey for this.
> >  
> >      def delete(self, *args, **kwargs):
> > +        SubmissionTag.objects.filter(from_comment=self.id).delete()
> 
> Also noted above, use 'on_delete' to avoid the need to do this.
> 
> >          super(Comment, self).delete(*args, **kwargs)
> > -        if hasattr(self.submission, 'patch'):
> > -            self.submission.patch.refresh_tag_counts()
> >  
> >      class Meta:
> >          ordering = ['date']
> > diff --git a/patchwork/templatetags/patch.py
> > b/patchwork/templatetags/patch.py
> > index 4350e09..f226dc0 100644
> > --- a/patchwork/templatetags/patch.py
> > +++ b/patchwork/templatetags/patch.py
> > @@ -34,8 +34,9 @@ register = template.Library()
> >  def patch_tags(patch):
> >      counts = []
> >      titles = []
> > +    all_tags = patch.all_tags
> >      for tag in [t for t in patch.project.tags if t.show_column]:
> > -        count = getattr(patch, tag.attr_name)
> > +        count = len(all_tags[tag])
> >          titles.append('%d %s' % (count, tag.name))
> >          if count == 0:
> >              counts.append("-")
> > diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
> > index f8d23a3..8c41df6 100644
> > --- a/patchwork/views/__init__.py
> > +++ b/patchwork/views/__init__.py
> > @@ -272,9 +272,6 @@ def generic_list(request, project, view,
> > view_args=None, filter_settings=None,
> >      if patches is None:
> >          patches = Patch.objects.filter(patch_project=project)
> >  
> > -    # annotate with tag counts
> > -    patches = patches.with_tag_counts(project)
> > -
> >      patches = context['filters'].apply(patches)
> >      if not editable_order:
> >          patches = order.apply(patches)
> > diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py
> > index f5ff43c..8a0ce23 100644
> > --- a/patchwork/views/utils.py
> > +++ b/patchwork/views/utils.py
> > @@ -32,6 +32,8 @@ from django.utils import six
> >  
> >  from patchwork.models import Comment
> >  from patchwork.models import Patch
> > +from patchwork.models import SeriesPatch
> > +from patchwork.models import SubmissionTag
> >  from patchwork.models import Series
> >  
> >  if settings.ENABLE_REST_API:
> > @@ -75,9 +77,17 @@ def _submission_to_mbox(submission):
> >      else:
> >          postscript = ''
> >  
> > -    # TODO(stephenfin): Make this use the tags infrastructure
> > -    for comment in Comment.objects.filter(submission=submission):
> > -        body += comment.patch_responses
> > +    sub_ids = [submission.id]
> > +    if is_patch:
> > +        cover = SeriesPatch.objects.get(
> > +            patch_id=submission.id).series.cover_letter
> > +        if cover:
> > +            sub_ids.append(cover.id)
> > +
> > +    for (tagname, value) in SubmissionTag.objects.filter(
> > +            submission__id__in=sub_ids).values_list(
> > +                'tag__name', 'value').distinct():
> > +        body += '%s: %s\n' % (tagname, value)
> >  
> >      if postscript:
> >          body += '---\n' + postscript + '\n'
> 
>
diff mbox series

Patch

diff --git a/docs/usage/overview.rst b/docs/usage/overview.rst
index cc193f3..d7db840 100644
--- a/docs/usage/overview.rst
+++ b/docs/usage/overview.rst
@@ -110,10 +110,11 @@  one delegate can be assigned to a patch.
 Tags
 ~~~~
 
-Tags are specially formatted metadata appended to the foot the body of a patch
-or a comment on a patch. Patchwork extracts these tags at parse time and
-associates them with the patch. You add extra tags to an email by replying to
-the email. The following tags are available on a standard Patchwork install:
+Tags are specially formatted metadata appended to the foot the body of a patch,
+cover letter or a comment related to them. Patchwork extracts these tags at
+parse time and associates them with patches. You add extra tags to an email by
+replying to the email. The following tags are available on a standard Patchwork
+install:
 
 Acked-by:
 
diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py
index fc7ae97..4d82277 100644
--- a/patchwork/api/cover.py
+++ b/patchwork/api/cover.py
@@ -29,6 +29,7 @@  from patchwork.api.embedded import PersonSerializer
 from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
 from patchwork.models import CoverLetter
+from patchwork.models import SubmissionTag
 
 
 class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
@@ -37,18 +38,36 @@  class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
     submitter = PersonSerializer(read_only=True)
     mbox = SerializerMethodField()
     series = SeriesSerializer(many=True, read_only=True)
+    tags = SerializerMethodField()
 
     def get_mbox(self, instance):
         request = self.context.get('request')
         return request.build_absolute_uri(instance.get_mbox_url())
 
+    def get_tags(self, instance):
+        tags = instance.project.tags
+        if not tags:
+            return {}
+
+        all_tags = {}
+        related_tags = SubmissionTag.objects.filter(
+            submission=instance).values_list('tag__name', 'value').distinct()
+        for tag in tags:
+            all_tags[tag.name] = [related_tag[1] for related_tag in
+                                  related_tags if related_tag[0] == tag.name]
+            # Don't show tags that are not present
+            if not all_tags[tag.name]:
+                del(all_tags[tag.name])
+
+        return all_tags
+
     class Meta:
         model = CoverLetter
         fields = ('id', 'url', 'project', 'msgid', 'date', 'name', 'submitter',
-                  'mbox', 'series')
+                  'mbox', 'series', 'tags')
         read_only_fields = fields
         versioned_fields = {
-            '1.1': ('mbox', ),
+            '1.1': ('mbox', 'tags'),
         }
         extra_kwargs = {
             'url': {'view_name': 'api-cover-detail'},
@@ -68,6 +87,7 @@  class CoverLetterDetailSerializer(CoverLetterListSerializer):
         fields = CoverLetterListSerializer.Meta.fields + ('headers', 'content')
         read_only_fields = fields
         extra_kwargs = CoverLetterListSerializer.Meta.extra_kwargs
+        versioned_fields = CoverLetterListSerializer.Meta.versioned_fields
 
 
 class CoverLetterList(ListAPIView):
diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
index 115feff..295b046 100644
--- a/patchwork/api/patch.py
+++ b/patchwork/api/patch.py
@@ -24,9 +24,9 @@  from rest_framework.generics import ListAPIView
 from rest_framework.generics import RetrieveUpdateAPIView
 from rest_framework.relations import RelatedField
 from rest_framework.reverse import reverse
-from rest_framework.serializers import HyperlinkedModelSerializer
 from rest_framework.serializers import SerializerMethodField
 
+from patchwork.api.base import BaseHyperlinkedModelSerializer
 from patchwork.api.base import PatchworkPermission
 from patchwork.api.filters import PatchFilter
 from patchwork.api.embedded import PersonSerializer
@@ -34,7 +34,9 @@  from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
 from patchwork.api.embedded import UserSerializer
 from patchwork.models import Patch
+from patchwork.models import SeriesPatch
 from patchwork.models import State
+from patchwork.models import SubmissionTag
 from patchwork.parser import clean_subject
 
 
@@ -75,7 +77,7 @@  class StateField(RelatedField):
         return State.objects.all()
 
 
-class PatchListSerializer(HyperlinkedModelSerializer):
+class PatchListSerializer(BaseHyperlinkedModelSerializer):
 
     project = ProjectSerializer(read_only=True)
     state = StateField()
@@ -92,9 +94,28 @@  class PatchListSerializer(HyperlinkedModelSerializer):
         return request.build_absolute_uri(instance.get_mbox_url())
 
     def get_tags(self, instance):
-        # TODO(stephenfin): Make tags performant, possibly by reworking the
-        # model
-        return {}
+        tags = instance.project.tags
+        if not tags:
+            return {}
+
+        sub_ids = [instance.id]
+        cover = SeriesPatch.objects.get(
+            patch_id=instance.id).series.cover_letter
+        if cover:
+            sub_ids.append(cover.id)
+
+        all_tags = {}
+        related_tags = SubmissionTag.objects.filter(
+            submission__id__in=sub_ids).values_list('tag__name',
+                                                    'value').distinct()
+        for tag in tags:
+            all_tags[tag.name] = [related_tag[1] for related_tag in
+                                  related_tags if related_tag[0] == tag.name]
+            # Don't show tags that are not present
+            if not all_tags[tag.name]:
+                del(all_tags[tag.name])
+
+        return all_tags
 
     def get_check(self, instance):
         return instance.combined_check_state
@@ -115,6 +136,9 @@  class PatchListSerializer(HyperlinkedModelSerializer):
         extra_kwargs = {
             'url': {'view_name': 'api-patch-detail'},
         }
+        versioned_fields = {
+            '1.1': ('tags', ),
+        }
 
 
 class PatchDetailSerializer(PatchListSerializer):
@@ -136,6 +160,7 @@  class PatchDetailSerializer(PatchListSerializer):
         read_only_fields = PatchListSerializer.Meta.read_only_fields + (
             'headers', 'content', 'diff', 'prefixes')
         extra_kwargs = PatchListSerializer.Meta.extra_kwargs
+        versioned_fields = PatchListSerializer.Meta.versioned_fields
 
 
 class PatchList(ListAPIView):
diff --git a/patchwork/management/commands/retag.py b/patchwork/management/commands/retag.py
index 8617ff4..db40256 100644
--- a/patchwork/management/commands/retag.py
+++ b/patchwork/management/commands/retag.py
@@ -19,11 +19,12 @@ 
 
 from django.core.management.base import BaseCommand
 
+from patchwork.models import Cover
 from patchwork.models import Patch
 
 
 class Command(BaseCommand):
-    help = 'Update the tag (Ack/Review/Test) counts on existing patches'
+    help = 'Update tags on existing patches'
     args = '[<patch_id>...]'
 
     def handle(self, *args, **options):
@@ -37,7 +38,17 @@  class Command(BaseCommand):
         count = query.count()
 
         for i, patch in enumerate(query.iterator()):
-            patch.refresh_tag_counts()
+            patch.refresh_tags()
+            for comment in patch.comments.all():
+                comment.refresh_tags()
+
+            cover = SeriesPatch.objects.get(
+                patch_id=patch.id).series.cover_letter
+            if cover:
+                cover.refresh_tags()
+                for comment in cover.comments.all():
+                    comment.refresh_tags()
+
             if (i % 10) == 0:
                 self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                 self.stdout.flush()
diff --git a/patchwork/migrations/0026_rework_tagging.py b/patchwork/migrations/0026_rework_tagging.py
new file mode 100644
index 0000000..88ddcac
--- /dev/null
+++ b/patchwork/migrations/0026_rework_tagging.py
@@ -0,0 +1,123 @@ 
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+import re
+
+
+# Django migrations don't allow us to call models' methods because the
+# migration will break if the methods change. Therefore we need to use an
+# altered copy of all the code needed.
+def extract_tags(extract_from, tags):
+    found_tags = {}
+
+    if not extract_from.content:
+        return found_tags
+
+    for tag in tags:
+        regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
+        found_tags[tag] = regex.findall(extract_from.content)
+
+    return found_tags
+
+
+def save_tag_values(apps, submission, tag, values, comment_origin=None):
+    if not values:
+        # We don't need to delete tags since none exist yet and we can't
+        # delete comments etc. during the migration
+        return
+
+    SubmissionTag = apps.get_model('patchwork', 'SubmissionTag')
+    SubmissionTag.objects.bulk_create([SubmissionTag(
+        submission=submission,
+        tag=tag,
+        value=value,
+        from_comment=comment_origin
+    ) for value in values])
+
+
+def create_all(apps, schema_editor):
+    Tag = apps.get_model('patchwork', 'Tag')
+    tags = Tag.objects.all()
+
+    Submission = apps.get_model('patchwork', 'Submission')
+    for submission in Submission.objects.all():
+        extracted = extract_tags(submission, tags)
+        for tag in extracted:
+            save_tag_values(apps, submission, tag, extracted[tag])
+
+    Comment = apps.get_model('patchwork', 'Comment')
+    for comment in Comment.objects.all():
+        extracted = extract_tags(comment, tags)
+        for tag in extracted:
+            save_tag_values(apps,
+                            comment.submission,
+                            tag,
+                            extracted[tag],
+                            comment_origin=comment.id)
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('patchwork', '0025_add_regex_validators'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='SubmissionTag',
+            fields=[
+                ('id', models.AutoField(auto_created=True,
+                                        primary_key=True,
+                                        serialize=False,
+                                        verbose_name='ID')),
+                ('value', models.CharField(max_length=255)),
+                ('from_comment', models.IntegerField(null=True)),
+                ('submission', models.ForeignKey(
+                    on_delete=django.db.models.deletion.CASCADE,
+                    to='patchwork.Submission'
+                )),
+                ('tag', models.ForeignKey(
+                    on_delete=django.db.models.deletion.CASCADE,
+                    to='patchwork.Tag'
+                )),
+            ],
+        ),
+        migrations.AlterUniqueTogether(
+            name='patchtag',
+            unique_together=set([]),
+        ),
+        migrations.RemoveField(
+            model_name='patchtag',
+            name='patch',
+        ),
+        migrations.RemoveField(
+            model_name='patchtag',
+            name='tag',
+        ),
+        migrations.RemoveField(
+            model_name='patch',
+            name='tags',
+        ),
+        migrations.DeleteModel(
+            name='PatchTag',
+        ),
+        migrations.AddField(
+            model_name='submission',
+            name='related_tags',
+            field=models.ManyToManyField(
+                through='patchwork.SubmissionTag',
+                to='patchwork.Tag'
+            ),
+        ),
+        migrations.AlterUniqueTogether(
+            name='submissiontag',
+            unique_together=set([('submission',
+                                  'tag',
+                                  'value',
+                                  'from_comment')]),
+        ),
+        migrations.RunPython(create_all, atomic=False),
+    ]
diff --git a/patchwork/models.py b/patchwork/models.py
index f91b994..84447cc 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -20,8 +20,6 @@ 
 
 from __future__ import absolute_import
 
-from collections import Counter
-from collections import OrderedDict
 import datetime
 import random
 import re
@@ -252,10 +250,6 @@  class Tag(models.Model):
                                       ' tag\'s count in the patch list view',
                                       default=True)
 
-    @property
-    def attr_name(self):
-        return 'tag_%d_count' % self.id
-
     def __str__(self):
         return self.name
 
@@ -263,64 +257,20 @@  class Tag(models.Model):
         ordering = ['abbrev']
 
 
-class PatchTag(models.Model):
-    patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
-    tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
-    count = models.IntegerField(default=1)
+class SubmissionTag(models.Model):
+    submission = models.ForeignKey('Submission')
+    tag = models.ForeignKey('Tag')
+    value = models.CharField(max_length=255)
+    from_comment = models.IntegerField(null=True)
 
     class Meta:
-        unique_together = [('patch', 'tag')]
+        unique_together = [('submission', 'tag', 'value', 'from_comment')]
 
 
 def get_default_initial_patch_state():
     return State.objects.get(ordering=0)
 
 
-class PatchQuerySet(models.query.QuerySet):
-
-    def with_tag_counts(self, project=None):
-        if project and not project.use_tags:
-            return self
-
-        # We need the project's use_tags field loaded for Project.tags().
-        # Using prefetch_related means we'll share the one instance of
-        # Project, and share the project.tags cache between all patch.project
-        # references.
-        qs = self.prefetch_related('project')
-        select = OrderedDict()
-        select_params = []
-
-        # All projects have the same tags, so we're good to go here
-        if project:
-            tags = project.tags
-        else:
-            tags = Tag.objects.all()
-
-        for tag in tags:
-            select[tag.attr_name] = (
-                "coalesce("
-                "(SELECT count FROM patchwork_patchtag"
-                " WHERE patchwork_patchtag.patch_id="
-                "patchwork_patch.submission_ptr_id"
-                " AND patchwork_patchtag.tag_id=%s), 0)")
-            select_params.append(tag.id)
-
-        return qs.extra(select=select, select_params=select_params)
-
-
-class PatchManager(models.Manager):
-    use_for_related_fields = True
-    # NOTE(stephenfin): This is necessary to silence a warning with Django >=
-    # 1.10. Remove when 1.10 is the minimum supported version.
-    silence_use_for_related_fields_deprecation = True
-
-    def get_queryset(self):
-        return PatchQuerySet(self.model, using=self.db)
-
-    def with_tag_counts(self, project):
-        return self.get_queryset().with_tag_counts(project)
-
-
 class EmailMixin(models.Model):
     """Mixin for models with an email-origin."""
     # email metadata
@@ -334,17 +284,16 @@  class EmailMixin(models.Model):
     submitter = models.ForeignKey(Person, on_delete=models.CASCADE)
     content = models.TextField(null=True, blank=True)
 
-    response_re = re.compile(
-        r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$',
-        re.M | re.I)
+    def _extract_tags(self, tags):
+        found_tags = {}
 
-    @property
-    def patch_responses(self):
         if not self.content:
-            return ''
+            return found_tags
 
-        return ''.join([match.group(0) + '\n' for match in
-                        self.response_re.finditer(self.content)])
+        for tag in tags:
+            regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
+            found_tags[tag] = regex.findall(self.content)
+        return found_tags
 
     def save(self, *args, **kwargs):
         # Modifying a submission via admin interface changes '\n' newlines in
@@ -353,6 +302,7 @@  class EmailMixin(models.Model):
         # on PY2
         self.content = self.content.replace('\r\n', '\n')
         super(EmailMixin, self).save(*args, **kwargs)
+        self.refresh_tags()
 
     class Meta:
         abstract = True
@@ -377,6 +327,34 @@  class Submission(FilenameMixin, EmailMixin, models.Model):
     # submission metadata
 
     name = models.CharField(max_length=255)
+    related_tags = models.ManyToManyField(Tag, through=SubmissionTag)
+
+    def save_tag_values(self, tag, values, comment_origin=None):
+        current_objs = SubmissionTag.objects.filter(
+            submission=self,
+            from_comment=comment_origin,
+            tag=tag
+        )
+
+        if not values:
+            current_objs.delete()
+            return
+
+        # In case the origin is modified, delete tags that were removed
+        current_objs.exclude(value__in=values).delete()
+
+        values_to_add = set(values) - set(current_objs.values_list('value'))
+        SubmissionTag.objects.bulk_create([SubmissionTag(
+            submission=self,
+            tag=tag,
+            value=value,
+            from_comment=comment_origin
+        ) for value in values_to_add])
+
+    def refresh_tags(self):
+        submission_tags = self._extract_tags(Tag.objects.all())
+        for tag in submission_tags:
+            self.save_tag_values(tag, submission_tags[tag])
 
     # patchwork metadata
 
@@ -386,6 +364,10 @@  class Submission(FilenameMixin, EmailMixin, models.Model):
     def __str__(self):
         return self.name
 
+    def delete(self, *args, **kwargs):
+        SubmissionTag.objects.filter(submission=self).delete()
+        super(Submission, self).delete(*args, **kwargs)
+
     class Meta:
         ordering = ['date']
         unique_together = [('msgid', 'project')]
@@ -423,7 +405,6 @@  class Patch(SeriesMixin, Submission):
     diff = models.TextField(null=True, blank=True)
     commit_ref = models.CharField(max_length=255, null=True, blank=True)
     pull_url = models.CharField(max_length=255, null=True, blank=True)
-    tags = models.ManyToManyField(Tag, through=PatchTag)
 
     # patchwork metadata
 
@@ -437,40 +418,6 @@  class Patch(SeriesMixin, Submission):
     # patches in a project without needing to do a JOIN.
     patch_project = models.ForeignKey(Project, on_delete=models.CASCADE)
 
-    objects = PatchManager()
-
-    @staticmethod
-    def extract_tags(content, tags):
-        counts = Counter()
-
-        for tag in tags:
-            regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
-            counts[tag] = len(regex.findall(content))
-
-        return counts
-
-    def _set_tag(self, tag, count):
-        if count == 0:
-            self.patchtag_set.filter(tag=tag).delete()
-            return
-        patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
-        if patchtag.count != count:
-            patchtag.count = count
-            patchtag.save()
-
-    def refresh_tag_counts(self):
-        tags = self.project.tags
-        counter = Counter()
-
-        if self.content:
-            counter += self.extract_tags(self.content, tags)
-
-        for comment in self.comments.all():
-            counter = counter + self.extract_tags(comment.content, tags)
-
-        for tag in tags:
-            self._set_tag(tag, counter[tag])
-
     def save(self, *args, **kwargs):
         if not hasattr(self, 'state') or not self.state:
             self.state = get_default_initial_patch_state()
@@ -480,8 +427,6 @@  class Patch(SeriesMixin, Submission):
 
         super(Patch, self).save(**kwargs)
 
-        self.refresh_tag_counts()
-
     def is_editable(self, user):
         if not is_authenticated(user):
             return False
@@ -492,6 +437,22 @@  class Patch(SeriesMixin, Submission):
         return self.project.is_editable(user)
 
     @property
+    def all_tags(self):
+        sub_ids = [self.id]
+        cover = SeriesPatch.objects.get(patch_id=self.id).series.cover_letter
+        if cover:
+            sub_ids.append(cover.id)
+        related_tags = SubmissionTag.objects.filter(
+            submission__id__in=sub_ids).values_list('tag__name',
+                                                    'value').distinct()
+
+        patch_tags = {}
+        for tag in self.project.tags:
+            patch_tags[tag] = [related_tag[1] for related_tag in related_tags
+                               if related_tag[0] == tag.name]
+        return patch_tags
+
+    @property
     def combined_check_state(self):
         """Return the combined state for all checks.
 
@@ -603,15 +564,16 @@  class Comment(EmailMixin, models.Model):
                                    related_query_name='comment',
                                    on_delete=models.CASCADE)
 
-    def save(self, *args, **kwargs):
-        super(Comment, self).save(*args, **kwargs)
-        if hasattr(self.submission, 'patch'):
-            self.submission.patch.refresh_tag_counts()
+    def refresh_tags(self):
+        comment_tags = self._extract_tags(Tag.objects.all())
+        for tag in comment_tags:
+            self.submission.save_tag_values(tag,
+                                            comment_tags[tag],
+                                            comment_origin=self.id)
 
     def delete(self, *args, **kwargs):
+        SubmissionTag.objects.filter(from_comment=self.id).delete()
         super(Comment, self).delete(*args, **kwargs)
-        if hasattr(self.submission, 'patch'):
-            self.submission.patch.refresh_tag_counts()
 
     class Meta:
         ordering = ['date']
diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py
index 4350e09..f226dc0 100644
--- a/patchwork/templatetags/patch.py
+++ b/patchwork/templatetags/patch.py
@@ -34,8 +34,9 @@  register = template.Library()
 def patch_tags(patch):
     counts = []
     titles = []
+    all_tags = patch.all_tags
     for tag in [t for t in patch.project.tags if t.show_column]:
-        count = getattr(patch, tag.attr_name)
+        count = len(all_tags[tag])
         titles.append('%d %s' % (count, tag.name))
         if count == 0:
             counts.append("-")
diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
index f8d23a3..8c41df6 100644
--- a/patchwork/views/__init__.py
+++ b/patchwork/views/__init__.py
@@ -272,9 +272,6 @@  def generic_list(request, project, view, view_args=None, filter_settings=None,
     if patches is None:
         patches = Patch.objects.filter(patch_project=project)
 
-    # annotate with tag counts
-    patches = patches.with_tag_counts(project)
-
     patches = context['filters'].apply(patches)
     if not editable_order:
         patches = order.apply(patches)
diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py
index f5ff43c..8a0ce23 100644
--- a/patchwork/views/utils.py
+++ b/patchwork/views/utils.py
@@ -32,6 +32,8 @@  from django.utils import six
 
 from patchwork.models import Comment
 from patchwork.models import Patch
+from patchwork.models import SeriesPatch
+from patchwork.models import SubmissionTag
 from patchwork.models import Series
 
 if settings.ENABLE_REST_API:
@@ -75,9 +77,17 @@  def _submission_to_mbox(submission):
     else:
         postscript = ''
 
-    # TODO(stephenfin): Make this use the tags infrastructure
-    for comment in Comment.objects.filter(submission=submission):
-        body += comment.patch_responses
+    sub_ids = [submission.id]
+    if is_patch:
+        cover = SeriesPatch.objects.get(
+            patch_id=submission.id).series.cover_letter
+        if cover:
+            sub_ids.append(cover.id)
+
+    for (tagname, value) in SubmissionTag.objects.filter(
+            submission__id__in=sub_ids).values_list(
+                'tag__name', 'value').distinct():
+        body += '%s: %s\n' % (tagname, value)
 
     if postscript:
         body += '---\n' + postscript + '\n'