diff mbox series

[v2,4/6] models: Migrate event fields to JSON field

Message ID 20180409210256.19649-5-stephen@that.guru
State Rejected
Headers show
Series Add 'Event.payload' field | expand

Commit Message

Stephen Finucane April 9, 2018, 9:02 p.m. UTC
This allows us to remove swathes of ForeignKeys and greatly speed up the
'/events' endpoint. This comes at the disadvantage of preventing us
indexing the embedded fields and would result in different responses if
the serializers ever change; however, we don't do the former at the
moment and it's unlikely that the latter will happen regularly (and can
be addressed by a migration if absolutely necessary).

Note that to generated the JSON, we use the same serializers we normally
use for embedded responses. This has the advantage of minimizing the
amount of new code needed and handles a lot of the ugliness of
serialization. However, this does necessitate using Django REST
Framework for events.  This has been addressed in a previous patch.

Signed-off-by: Stephen Finucane <stephen@that.guru>
Closes-bug: #153 ("'/events' is slow")
---
Changes since v1:
- Add 'PayloadField', allowing us to separate the work we're doing for
  populating the various URL fields in the payload from everything else
- Fix a bug in the migration that was resulting in escaped JSON being
  written to the 'event.payload' field.
- Update migration numbers per additional changes upstream.
- Random bug fixes and improvements.

Changes since RFC:
- Remove event cleanup from migration, since we're no longer doing that
- Removed unrelated migration data from a migration
---
 patchwork/api/event.py                             | 120 ++++++++++++---------
 patchwork/fields.py                                |  46 ++++++++
 .../migrations/0026_add_event_payload_field.py     |  21 ++++
 ...27_migrate_data_from_event_fields_to_payload.py |  61 +++++++++++
 .../migrations/0028_remove_old_event_fields.py     |  34 ++++++
 patchwork/models.py                                |  28 +----
 patchwork/signals.py                               |  58 ++++++++--
 patchwork/tests/test_events.py                     | 110 ++++++++++++-------
 8 files changed, 356 insertions(+), 122 deletions(-)
 create mode 100644 patchwork/migrations/0026_add_event_payload_field.py
 create mode 100644 patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
 create mode 100644 patchwork/migrations/0028_remove_old_event_fields.py

Comments

Stephen Finucane April 10, 2018, 3:48 p.m. UTC | #1
On Mon, 2018-04-09 at 22:02 +0100, Stephen Finucane wrote:
> This allows us to remove swathes of ForeignKeys and greatly speed up the
> '/events' endpoint. This comes at the disadvantage of preventing us
> indexing the embedded fields and would result in different responses if
> the serializers ever change; however, we don't do the former at the
> moment and it's unlikely that the latter will happen regularly (and can
> be addressed by a migration if absolutely necessary).
> 
> Note that to generated the JSON, we use the same serializers we normally
> use for embedded responses. This has the advantage of minimizing the
> amount of new code needed and handles a lot of the ugliness of
> serialization. However, this does necessitate using Django REST
> Framework for events.  This has been addressed in a previous patch.
> 
> Signed-off-by: Stephen Finucane <stephen@that.guru>
> Closes-bug: #153 ("'/events' is slow")
> ---
> Changes since v1:
> - Add 'PayloadField', allowing us to separate the work we're doing for
>   populating the various URL fields in the payload from everything else
> - Fix a bug in the migration that was resulting in escaped JSON being
>   written to the 'event.payload' field.
> - Update migration numbers per additional changes upstream.
> - Random bug fixes and improvements.
> 
> Changes since RFC:
> - Remove event cleanup from migration, since we're no longer doing that
> - Removed unrelated migration data from a migration
> ---
>  patchwork/api/event.py                             | 120 ++++++++++++---------
>  patchwork/fields.py                                |  46 ++++++++
>  .../migrations/0026_add_event_payload_field.py     |  21 ++++
>  ...27_migrate_data_from_event_fields_to_payload.py |  61 +++++++++++
>  .../migrations/0028_remove_old_event_fields.py     |  34 ++++++
>  patchwork/models.py                                |  28 +----
>  patchwork/signals.py                               |  58 ++++++++--
>  patchwork/tests/test_events.py                     | 110 ++++++++++++-------
>  8 files changed, 356 insertions(+), 122 deletions(-)
>  create mode 100644 patchwork/migrations/0026_add_event_payload_field.py
>  create mode 100644 patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
>  create mode 100644 patchwork/migrations/0028_remove_old_event_fields.py
> 
> diff --git a/patchwork/api/event.py b/patchwork/api/event.py
> index 9879a9f6..482dec1e 100644
> --- a/patchwork/api/event.py
> +++ b/patchwork/api/event.py
> @@ -17,14 +17,15 @@
>  # along with Patchwork; if not, write to the Free Software
>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>  
> -from collections import OrderedDict
>  import json
>  
>  from rest_framework.generics import ListAPIView
> +from rest_framework.reverse import reverse
>  from rest_framework.serializers import ModelSerializer
> -from rest_framework.serializers import SerializerMethodField
>  from rest_framework.renderers import JSONRenderer
>  from rest_framework.renderers import TemplateHTMLRenderer
> +from rest_framework.serializers import ReadOnlyField
> +from rest_framework.serializers import Serializer
>  
>  from patchwork.api.embedded import CheckSerializer
>  from patchwork.api.embedded import CoverLetterSerializer
> @@ -33,58 +34,81 @@ from patchwork.api.embedded import ProjectSerializer
>  from patchwork.api.embedded import SeriesSerializer
>  from patchwork.api.embedded import UserSerializer
>  from patchwork.api.filters import EventFilter
> -from patchwork.api.patch import StateField
>  from patchwork.models import Event
>  
>  
> -class EventSerializer(ModelSerializer):
> +class StateField(ReadOnlyField):
>  
> -    project = ProjectSerializer(read_only=True)
> -    patch = PatchSerializer(read_only=True)
> -    series = SeriesSerializer(read_only=True)
> -    cover = CoverLetterSerializer(read_only=True)
> +    def to_representation(self, obj):
> +        return '-'.join(obj.name.lower().split())
> +
> +
> +class PayloadField(ReadOnlyField):
> +
> +    def to_representation(self, value):
> +        request = self.context['request']
> +
> +        # set the 'url' and 'mbox' fields for any embedded 'cover',
> +        # 'patch', and 'series' fields
> +        for field in ('cover', 'patch', 'series'):
> +            if not value.get(field):
> +                continue
> +
> +            for subfield, lookup, url_name in (
> +                    ('url', 'pk', 'api-{}-detail'),
> +                    ('mbox', '{}_id', '{}-mbox')):
> +                value[field][subfield] = reverse(
> +                    url_name.format(field),
> +                    request=request,
> +                    kwargs={
> +                        lookup.format(field): value[field]['id'],
> +                    })
> +
> +        # set the 'url' field for any embedded '*_delegate' fields
> +        for field in ('previous_delegate', 'current_delegate'):
> +            if not value.get(field):
> +                continue
> +
> +            value[field]['url'] = reverse(
> +                'api-user-detail',
> +                request=request,
> +                kwargs={
> +                    'pk': value[field]['id']
> +                })
> +
> +        # set the 'url' field for any embedded 'check' fields
> +        if value.get('check'):
> +            value['check']['url'] = reverse(
> +                'api-check-detail',
> +                request=request,
> +                kwargs={
> +                    'patch_id': value['patch']['id'],
> +                    'check_id': value['check']['id']
> +                })
> +
> +        return value
> +
> +
> +class PayloadSerializer(Serializer):
> +
> +    cover = CoverLetterSerializer(required=False)
> +    patch = PatchSerializer(required=False)
> +    series = SeriesSerializer(required=False)
>      previous_state = StateField()
>      current_state = StateField()
> -    previous_delegate = UserSerializer()
> -    current_delegate = UserSerializer()
> -    created_check = SerializerMethodField()
> -    created_check = CheckSerializer()
> -
> -    _category_map = {
> -        Event.CATEGORY_COVER_CREATED: ['cover'],
> -        Event.CATEGORY_PATCH_CREATED: ['patch'],
> -        Event.CATEGORY_PATCH_COMPLETED: ['patch', 'series'],
> -        Event.CATEGORY_PATCH_STATE_CHANGED: ['patch', 'previous_state',
> -                                             'current_state'],
> -        Event.CATEGORY_PATCH_DELEGATED: ['patch', 'previous_delegate',
> -                                         'current_delegate'],
> -        Event.CATEGORY_CHECK_CREATED: ['patch', 'created_check'],
> -        Event.CATEGORY_SERIES_CREATED: ['series'],
> -        Event.CATEGORY_SERIES_COMPLETED: ['series'],
> -    }
> -
> -    def to_representation(self, instance):
> -        data = super(EventSerializer, self).to_representation(instance)
> -        payload = OrderedDict()
> -        kept_fields = self._category_map[instance.category] + [
> -            'id', 'category', 'project', 'date']
> -
> -        for field in [x for x in data]:
> -            if field not in kept_fields:
> -                del data[field]
> -            elif field in self._category_map[instance.category]:
> -                field_name = 'check' if field == 'created_check' else field
> -                payload[field_name] = data.pop(field)
> -
> -        data['payload'] = payload
> -
> -        return data
> +    previous_delegate = UserSerializer(required=False)
> +    current_delegate = UserSerializer(required=False)
> +    check = CheckSerializer(required=False)
> +
> +
> +class EventSerializer(ModelSerializer):
> +
> +    project = ProjectSerializer(read_only=True)
> +    payload = PayloadField()
>  
>      class Meta:
>          model = Event
> -        fields = ('id', 'category', 'project', 'date', 'patch', 'series',
> -                  'cover', 'previous_state', 'current_state',
> -                  'previous_delegate', 'current_delegate', 'created_check')
> +        fields = ('id', 'category', 'project', 'date', 'payload')
>          read_only_fields = fields
>  
>  
> @@ -110,8 +134,4 @@ class EventList(ListAPIView):
>      ordering = '-date'
>  
>      def get_queryset(self):
> -        return Event.objects.all()\
> -            .prefetch_related('project', 'patch', 'series', 'cover',
> -                              'previous_state', 'current_state',
> -                              'previous_delegate', 'current_delegate',
> -                              'created_check')
> +        return Event.objects.all().select_related('project')
> diff --git a/patchwork/fields.py b/patchwork/fields.py
> index 502558be..0ec5fe95 100644
> --- a/patchwork/fields.py
> +++ b/patchwork/fields.py
> @@ -20,7 +20,9 @@
>  
>  from __future__ import absolute_import
>  
> +from collections import OrderedDict
>  import hashlib
> +import json
>  
>  from django.db import models
>  from django.utils import six
> @@ -44,3 +46,47 @@ class HashField(models.CharField):
>  
>      def db_type(self, connection=None):
>          return 'char(%d)' % self.n_bytes
> +
> +
> +class JSONField(models.Field):
> +    """A basic field for loading/storing JSON.
> +
> +    We use this rather than Django's own JSONField because we don't need to
> +    index this field and said field does not support MySQL yet.
> +    """
> +
> +    def get_internal_type(self):
> +        return 'TextField'
> +
> +    def get_prep_value(self, value):
> +        # This is necessary for Django 1.8.x, which called 'smart_text' from
> +        # 'django.utils.encoding' on all data
> +        return value
> +
> +    def get_db_prep_value(self, value, connection, prepared=False):
> +        value = super(JSONField, self).get_db_prep_value(
> +            value, connection, prepared)
> +        return json.dumps(value)
> +
> +    def from_db_value(self, value, *args, **kwargs):
> +        if value is None:
> +            return value
> +
> +        return json.loads(value, object_pairs_hook=OrderedDict)
> +
> +    def db_type(self, connection):
> +        if connection.vendor == 'postgresql':
> +            if connection.pg_version >= 90400:
> +                return 'jsonb'
> +            return 'text'
> +        if connection.vendor == 'mysql':
> +            if connection.mysql_version >= (5, 7, 8):
> +                return 'json'
> +            return 'longtext'
> +        return 'text'
> +
> +    def to_python(self, value):
> +        if value is None or isinstance(value, OrderedDict):
> +            return value
> +
> +        return json.loads(value, object_pairs_hook=OrderedDict)
> diff --git a/patchwork/migrations/0026_add_event_payload_field.py b/patchwork/migrations/0026_add_event_payload_field.py
> new file mode 100644
> index 00000000..adbdfe2a
> --- /dev/null
> +++ b/patchwork/migrations/0026_add_event_payload_field.py
> @@ -0,0 +1,21 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +import patchwork.fields
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0025_add_regex_validators'),
> +    ]
> +
> +    operations = [
> +        migrations.AddField(
> +            model_name='event',
> +            name='payload',
> +            field=patchwork.fields.JSONField(blank=True, null=True),
> +        ),
> +    ]
> diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> new file mode 100644
> index 00000000..5dca85c5
> --- /dev/null
> +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> @@ -0,0 +1,61 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +from patchwork.api.event import PayloadSerializer
> +from patchwork.models import Event
> +
> +
> +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> +class Payload(object):
> +
> +    def __init__(self, **kwargs):
> +        for kwarg in kwargs:
> +            setattr(self, kwarg, kwargs[kwarg])
> +
> +
> +def generate_payload(apps, schema_editor):
> +    # NOTE(stephenfin): We could convert this to native SQL in the future
> +    # by using e.g. 'JSON_OBJECT' for MySQL

FYI, I did try doing but couldn't finish it.

  UPDATE `patchwork_event` as event
  	LEFT OUTER JOIN `patchwork_submission` AS submission ON (event.`patch_id` = submission.`id`)
  	SET event.`payload` = JSON_OBJECT('id', submission.`id`, 'url', NULL, 'msgid', submission.`msgid`, 'date', submission.`date`, 'name', submission.`name`, 'mbox', NULL)
  	WHERE event.`category` = 'patch-created';

The above _mostly_ accomplishes what we want for a single event but
it's removing a layer of nesting, e.g.

    "payload": {
      "patch": {
        "id": 1,
        ...
      }
    }

becomes:

    "payload": {
      "id": 1,
      ...
    }

It seems JSON_OBJECT doesn't like being nested either so I'm stuck.
Also, this is MySQL only, which I'm not sure many (any?) people are
running in production.

Looks like we're stuck with the slow but valid Python migration if we
go ahead with this.

Stephen

> +    EventBase = apps.get_model('patchwork', 'Event')
> +
> +    for event in EventBase.objects.all():
> +        category = event.category
> +        # TODO(stephenfin): This logic should be moved into the EventSerializer
> +        # class
> +        if category == Event.CATEGORY_COVER_CREATED:
> +            payload_obj = Payload(cover=event.cover)
> +        elif category == Event.CATEGORY_PATCH_CREATED:
> +            payload_obj = Payload(patch=event.patch)
> +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
> +            payload_obj = Payload(patch=event.patch,
> +                                  previous_state=event.previous_state,
> +                                  current_state=event.previous_state)
> +        elif category == Event.CATEGORY_PATCH_DELEGATED:
> +            payload_obj = Payload(patch=event.patch,
> +                                  previous_delegate=event.previous_delegate,
> +                                  current_delegate=event.current_delegate)
> +        elif category == Event.CATEGORY_PATCH_COMPLETED:
> +            payload_obj = Payload(patch=event.patch)
> +        elif category == Event.CATEGORY_CHECK_CREATED:
> +            payload_obj = Payload(patch=event.patch, check=event.created_check)
> +        elif category == Event.CATEGORY_SERIES_CREATED:
> +            payload_obj = Payload(series=event.series)
> +        elif category == Event.CATEGORY_SERIES_COMPLETED:
> +            payload_obj = Payload(series=event.series)
> +
> +        payload = PayloadSerializer(payload_obj).data
> +        event.payload = payload
> +        event.save()
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0026_add_event_payload_field'),
> +    ]
> +
> +    operations = [
> +        migrations.RunPython(generate_payload, atomic=True),
> +    ]
> diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
> new file mode 100644
> index 00000000..2f7ba7bf
> --- /dev/null
> +++ b/patchwork/migrations/0028_remove_old_event_fields.py
> @@ -0,0 +1,34 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
> +    ]
> +
> +    operations = [
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='created_check',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='current_delegate',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='current_state',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='previous_delegate',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='previous_state',
> +        ),
> +    ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index f91b994c..a5992d68 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
>  from patchwork.compat import is_authenticated
>  from patchwork.compat import reverse
>  from patchwork.fields import HashField
> +from patchwork.fields import JSONField
>  from patchwork.hasher import hash_diff
>  
>  if settings.ENABLE_REST_API:
> @@ -943,32 +944,9 @@ class Event(models.Model):
>          on_delete=models.CASCADE,
>          help_text='The cover letter that this event was created for.')
>  
> -    # fields for 'patch-state-changed' events
> +    # additional data
>  
> -    previous_state = models.ForeignKey(
> -        State, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -    current_state = models.ForeignKey(
> -        State, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # fields for 'patch-delegate-changed' events
> -
> -    previous_delegate = models.ForeignKey(
> -        User, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -    current_delegate = models.ForeignKey(
> -        User, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # fields or 'patch-check-created' events
> -
> -    created_check = models.ForeignKey(
> -        Check, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # TODO(stephenfin): Validate that the correct fields are being set by way
> -    # of a 'clean' method
> +    payload = JSONField(null=True, blank=True)
>  
>      def __repr__(self):
>          return "<Event id='%d' category='%s'" % (self.id, self.category)
> diff --git a/patchwork/signals.py b/patchwork/signals.py
> index b083b51a..c0f4c08d 100644
> --- a/patchwork/signals.py
> +++ b/patchwork/signals.py
> @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
>  from django.db.models.signals import pre_save
>  from django.dispatch import receiver
>  
> +from patchwork.api.event import PayloadSerializer
>  from patchwork.models import Check
>  from patchwork.models import CoverLetter
>  from patchwork.models import Event
> @@ -33,6 +34,14 @@ from patchwork.models import Series
>  from patchwork.models import SeriesPatch
>  
>  
> +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> +class Payload(object):
> +
> +    def __init__(self, **kwargs):
> +        for kwarg in kwargs:
> +            setattr(self, kwarg, kwargs[kwarg])
> +
> +
>  @receiver(pre_save, sender=Patch)
>  def patch_change_callback(sender, instance, raw, **kwargs):
>      # we only want notification of modified patches
> @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
>  def create_cover_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(cover):
> +        payload = PayloadSerializer(Payload(
> +            cover=cover))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_COVER_CREATED,
>              project=cover.project,
> +            payload=payload.data,
>              cover=cover)
>  
>      # don't trigger for items loaded from fixtures or new items
> @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
>  def create_patch_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(patch):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_CREATED,
>              project=patch.project,
> +            payload=payload.data,
>              patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
> @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
>  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>  
>      def create_event(patch, before, after):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            previous_state=before,
> +            current_state=after))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_STATE_CHANGED,
>              project=patch.project,
> -            patch=patch,
> -            previous_state=before,
> -            current_state=after)
> +            payload=payload.data,
> +            patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
>      if raw or not instance.pk:
> @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>  def create_patch_delegated_event(sender, instance, raw, **kwargs):
>  
>      def create_event(patch, before, after):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            previous_delegate=before,
> +            current_delegate=after))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_DELEGATED,
>              project=patch.project,
> -            patch=patch,
> -            previous_delegate=before,
> -            current_delegate=after)
> +            payload=payload.data,
> +            patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
>      if raw or not instance.pk:
> @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>      """Create patch completed event for patches with series."""
>  
>      def create_event(patch, series):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_COMPLETED,
>              project=patch.project,
> +            payload=payload.data,
>              patch=patch,
>              series=series)
>  
> @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>  def create_check_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(check):
> +        payload = PayloadSerializer(Payload(
> +            patch=check.patch,
> +            check=check))
> +
>          # TODO(stephenfin): It might make sense to add a 'project' field to
>          # 'check' to prevent lookups here and in the REST API
>          return Event.objects.create(
>              category=Event.CATEGORY_CHECK_CREATED,
>              project=check.patch.project,
> -            patch=check.patch,
> -            created_check=check)
> +            payload=payload.data,
> +            patch=check.patch)
>  
>      # don't trigger for items loaded from fixtures or existing items
>      if raw or not created:
> @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
>  def create_series_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(series):
> +        payload = PayloadSerializer(Payload(
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_SERIES_CREATED,
>              project=series.project,
> +            payload=payload.data,
>              series=series)
>  
>      # don't trigger for items loaded from fixtures or existing items
> @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
>      # in that case.
>  
>      def create_event(series):
> +        payload = PayloadSerializer(Payload(
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_SERIES_COMPLETED,
>              project=series.project,
> +            payload=payload.data,
>              series=series)
>  
>      # don't trigger for items loaded from fixtures or existing items
> diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
> index 70d563de..39dff6f1 100644
> --- a/patchwork/tests/test_events.py
> +++ b/patchwork/tests/test_events.py
> @@ -17,32 +17,20 @@
>  # along with Patchwork; if not, write to the Free Software
>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>  
> +from collections import OrderedDict
> +
>  from django.test import TestCase
>  
>  from patchwork.models import Event
>  from patchwork.tests import utils
>  
> -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
> -               'current_delegate']
> -
>  
>  def _get_events(**filters):
>      # These are sorted by reverse normally, so reverse it once again
>      return Event.objects.filter(**filters).order_by('date')
>  
>  
> -class _BaseTestCase(TestCase):
> -
> -    def assertEventFields(self, event, parent_type='patch', **fields):
> -        for field_name in [x for x in BASE_FIELDS]:
> -            field = getattr(event, field_name)
> -            if field_name in fields:
> -                self.assertEqual(field, fields[field_name])
> -            else:
> -                self.assertIsNone(field)
> -
> -
> -class PatchCreateTest(_BaseTestCase):
> +class PatchCreateTest(TestCase):
>  
>      def test_patch_created(self):
>          """No series, so patch dependencies implicitly exist."""
> @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
>          # This should raise both the CATEGORY_PATCH_CREATED and
>          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>          self.assertEqual(events[0].project, patch.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIsInstance(payload, OrderedDict)
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], patch.id)
>  
>      def test_patch_dependencies_present_series(self):
>          """Patch dependencies already exist."""
> @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
>          # This should raise both the CATEGORY_PATCH_CREATED and
>          # CATEGORY_PATCH_COMPLETED events
>          events = _get_events(patch=series_patch.patch)
> +
>          self.assertEqual(events.count(), 2)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>          self.assertEqual(events[0].project, series_patch.patch.project)
>          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
>          self.assertEqual(events[1].project, series_patch.patch.project)
> -        self.assertEventFields(events[0])
> -        self.assertEventFields(events[1])
> +
> +        payload = events[0].payload
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> +
> +        payload = events[1].payload
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> +        self.assertIn('series', payload)
> +        self.assertEqual(payload['series']['id'], series_patch.series.id)
>  
>      def test_patch_dependencies_out_of_order(self):
>          series = utils.create_series()
> @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
>              events = _get_events(patch=series_patch.patch)
>              self.assertEqual(events.count(), 1)
>              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> -            self.assertEventFields(events[0])
>  
>          series_patch_1 = utils.create_series_patch(series=series, number=1)
>  
> @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
>              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>              self.assertEqual(events[1].category,
>                               Event.CATEGORY_PATCH_COMPLETED)
> -            self.assertEventFields(events[0])
> -            self.assertEventFields(events[1])
>  
>      def test_patch_dependencies_missing(self):
>          series_patch = utils.create_series_patch(number=2)
> @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
>          events = _get_events(patch=series_patch.patch)
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> -        self.assertEventFields(events[0])
>  
>  
> -class PatchChangedTest(_BaseTestCase):
> +class PatchChangedTest(TestCase):
>  
>      def test_patch_state_changed(self):
>          patch = utils.create_patch()
> @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 2)
>          # we don't care about the CATEGORY_PATCH_CREATED event here
>          self.assertEqual(events[1].category,
>                           Event.CATEGORY_PATCH_STATE_CHANGED)
>          self.assertEqual(events[1].project, patch.project)
> -        self.assertEventFields(events[1], previous_state=old_state,
> -                               current_state=new_state)
> +
> +        payload = events[1].payload
> +        self.assertIn('previous_state', payload)
> +        self.assertEqual(payload['previous_state'], old_state.slug)
> +        self.assertIn('current_state', payload)
> +        self.assertEqual(payload['current_state'], new_state.slug)
>  
>      def test_patch_delegated(self):
>          patch = utils.create_patch()
> @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 2)
>          # we don't care about the CATEGORY_PATCH_CREATED event here
>          self.assertEqual(events[1].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
>          self.assertEqual(events[1].project, patch.project)
> -        self.assertEventFields(events[1], current_delegate=delegate_a)
> +
> +        payload = events[1].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertIsNone(payload['previous_delegate'])
> +        self.assertIn('current_delegate', payload)
> +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
>  
>          delegate_b = utils.create_user()
>  
> @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 3)
>          self.assertEqual(events[2].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
> -        self.assertEventFields(events[2], previous_delegate=delegate_a,
> -                               current_delegate=delegate_b)
> +
> +        payload = events[2].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
> +        self.assertIn('current_delegate', payload)
> +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
>  
>          # Delegate B -> None
>  
> @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 4)
>          self.assertEqual(events[3].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
> -        self.assertEventFields(events[3], previous_delegate=delegate_b)
>  
> +        payload = events[3].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
> +        self.assertIn('current_delegate', payload)
> +        self.assertIsNone(payload['current_delegate'])
>  
> -class CheckCreateTest(_BaseTestCase):
> +
> +class CheckCreateTest(TestCase):
>  
>      def test_check_created(self):
>          check = utils.create_check()
> -        events = _get_events(created_check=check)
> -        self.assertEqual(events.count(), 1)
> -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
> -        self.assertEqual(events[0].project, check.patch.project)
> -        self.assertEventFields(events[0])
> +        # we don't care about the CATEGORY_PATCH_CREATED event here
> +        events = _get_events(patch=check.patch)
>  
> +        self.assertEqual(events.count(), 2)
> +        # we don't care about the CATEGORY_PATCH_CREATED event here
> +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
> +        self.assertEqual(events[1].project, check.patch.project)
> +
> +        payload = events[1].payload
> +        self.assertIn('check', payload)
> +        self.assertEqual(payload['check']['id'], check.id)
>  
> -class CoverCreateTest(_BaseTestCase):
> +
> +class CoverCreateTest(TestCase):
>  
>      def test_cover_created(self):
>          cover = utils.create_cover()
> @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
>          self.assertEqual(events[0].project, cover.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIn('cover', payload)
> +        self.assertEqual(payload['cover']['id'], cover.id)
>  
>  
> -class SeriesCreateTest(_BaseTestCase):
> +class SeriesCreateTest(TestCase):
>  
>      def test_series_created(self):
>          series = utils.create_series()
> @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
>          self.assertEqual(events[0].project, series.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIn('series', payload)
> +        self.assertEqual(payload['series']['id'], series.id)
Daniel Axtens April 10, 2018, 4:23 p.m. UTC | #2
> diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> new file mode 100644
> index 00000000..5dca85c5
> --- /dev/null
> +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> @@ -0,0 +1,61 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +from patchwork.api.event import PayloadSerializer
> +from patchwork.models import Event
> +
> +
> +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> +class Payload(object):
> +
> +    def __init__(self, **kwargs):
> +        for kwarg in kwargs:
> +            setattr(self, kwarg, kwargs[kwarg])
> +
> +
> +def generate_payload(apps, schema_editor):
> +    # NOTE(stephenfin): We could convert this to native SQL in the future
> +    # by using e.g. 'JSON_OBJECT' for MySQL
> +    EventBase = apps.get_model('patchwork', 'Event')
> +
> +    for event in EventBase.objects.all():
> +        category = event.category
> +        # TODO(stephenfin): This logic should be moved into the EventSerializer
> +        # class
> +        if category == Event.CATEGORY_COVER_CREATED:
> +            payload_obj = Payload(cover=event.cover)
> +        elif category == Event.CATEGORY_PATCH_CREATED:
> +            payload_obj = Payload(patch=event.patch)
> +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
> +            payload_obj = Payload(patch=event.patch,
> +                                  previous_state=event.previous_state,
> +                                  current_state=event.previous_state)
> +        elif category == Event.CATEGORY_PATCH_DELEGATED:
> +            payload_obj = Payload(patch=event.patch,
> +                                  previous_delegate=event.previous_delegate,
> +                                  current_delegate=event.current_delegate)
> +        elif category == Event.CATEGORY_PATCH_COMPLETED:
> +            payload_obj = Payload(patch=event.patch)
> +        elif category == Event.CATEGORY_CHECK_CREATED:
> +            payload_obj = Payload(patch=event.patch, check=event.created_check)
> +        elif category == Event.CATEGORY_SERIES_CREATED:
> +            payload_obj = Payload(series=event.series)
> +        elif category == Event.CATEGORY_SERIES_COMPLETED:
> +            payload_obj = Payload(series=event.series)
> +
> +        payload = PayloadSerializer(payload_obj).data
> +        event.payload = payload
> +        event.save()
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0026_add_event_payload_field'),
> +    ]
> +
> +    operations = [
> +        migrations.RunPython(generate_payload, atomic=True),
> +    ]


I tried to run this migration on my laptop (16GB ram + swap) on a db
with 102k patches. (The postgres sql dump is 781 MB).

It chewed up over 16GB of ram before being OOM killed.

This somewhat derailed my attempts to see if this offers any performance
gain to justify the additional complexity.

BTW - I asked some questions last time around about versioning and
dealing with any future changes. Did you have any thoughts on that? I
think it's also a big issue.

Regards,
Daniel


> diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
> new file mode 100644
> index 00000000..2f7ba7bf
> --- /dev/null
> +++ b/patchwork/migrations/0028_remove_old_event_fields.py
> @@ -0,0 +1,34 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
> +    ]
> +
> +    operations = [
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='created_check',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='current_delegate',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='current_state',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='previous_delegate',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='previous_state',
> +        ),
> +    ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index f91b994c..a5992d68 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
>  from patchwork.compat import is_authenticated
>  from patchwork.compat import reverse
>  from patchwork.fields import HashField
> +from patchwork.fields import JSONField
>  from patchwork.hasher import hash_diff
>  
>  if settings.ENABLE_REST_API:
> @@ -943,32 +944,9 @@ class Event(models.Model):
>          on_delete=models.CASCADE,
>          help_text='The cover letter that this event was created for.')
>  
> -    # fields for 'patch-state-changed' events
> +    # additional data
>  
> -    previous_state = models.ForeignKey(
> -        State, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -    current_state = models.ForeignKey(
> -        State, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # fields for 'patch-delegate-changed' events
> -
> -    previous_delegate = models.ForeignKey(
> -        User, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -    current_delegate = models.ForeignKey(
> -        User, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # fields or 'patch-check-created' events
> -
> -    created_check = models.ForeignKey(
> -        Check, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # TODO(stephenfin): Validate that the correct fields are being set by way
> -    # of a 'clean' method
> +    payload = JSONField(null=True, blank=True)
>  
>      def __repr__(self):
>          return "<Event id='%d' category='%s'" % (self.id, self.category)
> diff --git a/patchwork/signals.py b/patchwork/signals.py
> index b083b51a..c0f4c08d 100644
> --- a/patchwork/signals.py
> +++ b/patchwork/signals.py
> @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
>  from django.db.models.signals import pre_save
>  from django.dispatch import receiver
>  
> +from patchwork.api.event import PayloadSerializer
>  from patchwork.models import Check
>  from patchwork.models import CoverLetter
>  from patchwork.models import Event
> @@ -33,6 +34,14 @@ from patchwork.models import Series
>  from patchwork.models import SeriesPatch
>  
>  
> +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> +class Payload(object):
> +
> +    def __init__(self, **kwargs):
> +        for kwarg in kwargs:
> +            setattr(self, kwarg, kwargs[kwarg])
> +
> +
>  @receiver(pre_save, sender=Patch)
>  def patch_change_callback(sender, instance, raw, **kwargs):
>      # we only want notification of modified patches
> @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
>  def create_cover_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(cover):
> +        payload = PayloadSerializer(Payload(
> +            cover=cover))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_COVER_CREATED,
>              project=cover.project,
> +            payload=payload.data,
>              cover=cover)
>  
>      # don't trigger for items loaded from fixtures or new items
> @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
>  def create_patch_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(patch):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_CREATED,
>              project=patch.project,
> +            payload=payload.data,
>              patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
> @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
>  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>  
>      def create_event(patch, before, after):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            previous_state=before,
> +            current_state=after))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_STATE_CHANGED,
>              project=patch.project,
> -            patch=patch,
> -            previous_state=before,
> -            current_state=after)
> +            payload=payload.data,
> +            patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
>      if raw or not instance.pk:
> @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>  def create_patch_delegated_event(sender, instance, raw, **kwargs):
>  
>      def create_event(patch, before, after):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            previous_delegate=before,
> +            current_delegate=after))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_DELEGATED,
>              project=patch.project,
> -            patch=patch,
> -            previous_delegate=before,
> -            current_delegate=after)
> +            payload=payload.data,
> +            patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
>      if raw or not instance.pk:
> @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>      """Create patch completed event for patches with series."""
>  
>      def create_event(patch, series):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_COMPLETED,
>              project=patch.project,
> +            payload=payload.data,
>              patch=patch,
>              series=series)
>  
> @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>  def create_check_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(check):
> +        payload = PayloadSerializer(Payload(
> +            patch=check.patch,
> +            check=check))
> +
>          # TODO(stephenfin): It might make sense to add a 'project' field to
>          # 'check' to prevent lookups here and in the REST API
>          return Event.objects.create(
>              category=Event.CATEGORY_CHECK_CREATED,
>              project=check.patch.project,
> -            patch=check.patch,
> -            created_check=check)
> +            payload=payload.data,
> +            patch=check.patch)
>  
>      # don't trigger for items loaded from fixtures or existing items
>      if raw or not created:
> @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
>  def create_series_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(series):
> +        payload = PayloadSerializer(Payload(
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_SERIES_CREATED,
>              project=series.project,
> +            payload=payload.data,
>              series=series)
>  
>      # don't trigger for items loaded from fixtures or existing items
> @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
>      # in that case.
>  
>      def create_event(series):
> +        payload = PayloadSerializer(Payload(
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_SERIES_COMPLETED,
>              project=series.project,
> +            payload=payload.data,
>              series=series)
>  
>      # don't trigger for items loaded from fixtures or existing items
> diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
> index 70d563de..39dff6f1 100644
> --- a/patchwork/tests/test_events.py
> +++ b/patchwork/tests/test_events.py
> @@ -17,32 +17,20 @@
>  # along with Patchwork; if not, write to the Free Software
>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>  
> +from collections import OrderedDict
> +
>  from django.test import TestCase
>  
>  from patchwork.models import Event
>  from patchwork.tests import utils
>  
> -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
> -               'current_delegate']
> -
>  
>  def _get_events(**filters):
>      # These are sorted by reverse normally, so reverse it once again
>      return Event.objects.filter(**filters).order_by('date')
>  
>  
> -class _BaseTestCase(TestCase):
> -
> -    def assertEventFields(self, event, parent_type='patch', **fields):
> -        for field_name in [x for x in BASE_FIELDS]:
> -            field = getattr(event, field_name)
> -            if field_name in fields:
> -                self.assertEqual(field, fields[field_name])
> -            else:
> -                self.assertIsNone(field)
> -
> -
> -class PatchCreateTest(_BaseTestCase):
> +class PatchCreateTest(TestCase):
>  
>      def test_patch_created(self):
>          """No series, so patch dependencies implicitly exist."""
> @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
>          # This should raise both the CATEGORY_PATCH_CREATED and
>          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>          self.assertEqual(events[0].project, patch.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIsInstance(payload, OrderedDict)
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], patch.id)
>  
>      def test_patch_dependencies_present_series(self):
>          """Patch dependencies already exist."""
> @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
>          # This should raise both the CATEGORY_PATCH_CREATED and
>          # CATEGORY_PATCH_COMPLETED events
>          events = _get_events(patch=series_patch.patch)
> +
>          self.assertEqual(events.count(), 2)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>          self.assertEqual(events[0].project, series_patch.patch.project)
>          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
>          self.assertEqual(events[1].project, series_patch.patch.project)
> -        self.assertEventFields(events[0])
> -        self.assertEventFields(events[1])
> +
> +        payload = events[0].payload
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> +
> +        payload = events[1].payload
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> +        self.assertIn('series', payload)
> +        self.assertEqual(payload['series']['id'], series_patch.series.id)
>  
>      def test_patch_dependencies_out_of_order(self):
>          series = utils.create_series()
> @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
>              events = _get_events(patch=series_patch.patch)
>              self.assertEqual(events.count(), 1)
>              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> -            self.assertEventFields(events[0])
>  
>          series_patch_1 = utils.create_series_patch(series=series, number=1)
>  
> @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
>              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>              self.assertEqual(events[1].category,
>                               Event.CATEGORY_PATCH_COMPLETED)
> -            self.assertEventFields(events[0])
> -            self.assertEventFields(events[1])
>  
>      def test_patch_dependencies_missing(self):
>          series_patch = utils.create_series_patch(number=2)
> @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
>          events = _get_events(patch=series_patch.patch)
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> -        self.assertEventFields(events[0])
>  
>  
> -class PatchChangedTest(_BaseTestCase):
> +class PatchChangedTest(TestCase):
>  
>      def test_patch_state_changed(self):
>          patch = utils.create_patch()
> @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 2)
>          # we don't care about the CATEGORY_PATCH_CREATED event here
>          self.assertEqual(events[1].category,
>                           Event.CATEGORY_PATCH_STATE_CHANGED)
>          self.assertEqual(events[1].project, patch.project)
> -        self.assertEventFields(events[1], previous_state=old_state,
> -                               current_state=new_state)
> +
> +        payload = events[1].payload
> +        self.assertIn('previous_state', payload)
> +        self.assertEqual(payload['previous_state'], old_state.slug)
> +        self.assertIn('current_state', payload)
> +        self.assertEqual(payload['current_state'], new_state.slug)
>  
>      def test_patch_delegated(self):
>          patch = utils.create_patch()
> @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 2)
>          # we don't care about the CATEGORY_PATCH_CREATED event here
>          self.assertEqual(events[1].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
>          self.assertEqual(events[1].project, patch.project)
> -        self.assertEventFields(events[1], current_delegate=delegate_a)
> +
> +        payload = events[1].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertIsNone(payload['previous_delegate'])
> +        self.assertIn('current_delegate', payload)
> +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
>  
>          delegate_b = utils.create_user()
>  
> @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 3)
>          self.assertEqual(events[2].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
> -        self.assertEventFields(events[2], previous_delegate=delegate_a,
> -                               current_delegate=delegate_b)
> +
> +        payload = events[2].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
> +        self.assertIn('current_delegate', payload)
> +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
>  
>          # Delegate B -> None
>  
> @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 4)
>          self.assertEqual(events[3].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
> -        self.assertEventFields(events[3], previous_delegate=delegate_b)
>  
> +        payload = events[3].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
> +        self.assertIn('current_delegate', payload)
> +        self.assertIsNone(payload['current_delegate'])
>  
> -class CheckCreateTest(_BaseTestCase):
> +
> +class CheckCreateTest(TestCase):
>  
>      def test_check_created(self):
>          check = utils.create_check()
> -        events = _get_events(created_check=check)
> -        self.assertEqual(events.count(), 1)
> -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
> -        self.assertEqual(events[0].project, check.patch.project)
> -        self.assertEventFields(events[0])
> +        # we don't care about the CATEGORY_PATCH_CREATED event here
> +        events = _get_events(patch=check.patch)
>  
> +        self.assertEqual(events.count(), 2)
> +        # we don't care about the CATEGORY_PATCH_CREATED event here
> +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
> +        self.assertEqual(events[1].project, check.patch.project)
> +
> +        payload = events[1].payload
> +        self.assertIn('check', payload)
> +        self.assertEqual(payload['check']['id'], check.id)
>  
> -class CoverCreateTest(_BaseTestCase):
> +
> +class CoverCreateTest(TestCase):
>  
>      def test_cover_created(self):
>          cover = utils.create_cover()
> @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
>          self.assertEqual(events[0].project, cover.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIn('cover', payload)
> +        self.assertEqual(payload['cover']['id'], cover.id)
>  
>  
> -class SeriesCreateTest(_BaseTestCase):
> +class SeriesCreateTest(TestCase):
>  
>      def test_series_created(self):
>          series = utils.create_series()
> @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
>          self.assertEqual(events[0].project, series.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIn('series', payload)
> +        self.assertEqual(payload['series']['id'], series.id)
> -- 
> 2.14.3
>
> _______________________________________________
> Patchwork mailing list
> Patchwork@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/patchwork
Stephen Finucane April 10, 2018, 6:54 p.m. UTC | #3
On Wed, 2018-04-11 at 02:23 +1000, Daniel Axtens wrote:
> > diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> > new file mode 100644
> > index 00000000..5dca85c5
> > --- /dev/null
> > +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> > @@ -0,0 +1,61 @@
> > +# -*- coding: utf-8 -*-
> > +from __future__ import unicode_literals
> > +
> > +from django.db import migrations, models
> > +
> > +from patchwork.api.event import PayloadSerializer
> > +from patchwork.models import Event
> > +
> > +
> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> > +class Payload(object):
> > +
> > +    def __init__(self, **kwargs):
> > +        for kwarg in kwargs:
> > +            setattr(self, kwarg, kwargs[kwarg])
> > +
> > +
> > +def generate_payload(apps, schema_editor):
> > +    # NOTE(stephenfin): We could convert this to native SQL in the future
> > +    # by using e.g. 'JSON_OBJECT' for MySQL
> > +    EventBase = apps.get_model('patchwork', 'Event')
> > +
> > +    for event in EventBase.objects.all():
> > +        category = event.category
> > +        # TODO(stephenfin): This logic should be moved into the EventSerializer
> > +        # class
> > +        if category == Event.CATEGORY_COVER_CREATED:
> > +            payload_obj = Payload(cover=event.cover)
> > +        elif category == Event.CATEGORY_PATCH_CREATED:
> > +            payload_obj = Payload(patch=event.patch)
> > +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
> > +            payload_obj = Payload(patch=event.patch,
> > +                                  previous_state=event.previous_state,
> > +                                  current_state=event.previous_state)
> > +        elif category == Event.CATEGORY_PATCH_DELEGATED:
> > +            payload_obj = Payload(patch=event.patch,
> > +                                  previous_delegate=event.previous_delegate,
> > +                                  current_delegate=event.current_delegate)
> > +        elif category == Event.CATEGORY_PATCH_COMPLETED:
> > +            payload_obj = Payload(patch=event.patch)
> > +        elif category == Event.CATEGORY_CHECK_CREATED:
> > +            payload_obj = Payload(patch=event.patch, check=event.created_check)
> > +        elif category == Event.CATEGORY_SERIES_CREATED:
> > +            payload_obj = Payload(series=event.series)
> > +        elif category == Event.CATEGORY_SERIES_COMPLETED:
> > +            payload_obj = Payload(series=event.series)
> > +
> > +        payload = PayloadSerializer(payload_obj).data
> > +        event.payload = payload
> > +        event.save()
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [
> > +        ('patchwork', '0026_add_event_payload_field'),
> > +    ]
> > +
> > +    operations = [
> > +        migrations.RunPython(generate_payload, atomic=True),
> > +    ]
> 
> 
> I tried to run this migration on my laptop (16GB ram + swap) on a db
> with 102k patches. (The postgres sql dump is 781 MB).
> 
> It chewed up over 16GB of ram before being OOM killed.
> 
> This somewhat derailed my attempts to see if this offers any performance
> gain to justify the additional complexity.

Darn, I'd tested this with the Patchwork archives and, while slow, it
did work. Guess it must be loading a lot of stuff into memory. I'll try
this again later in the week with a larger archive and see if I can
optimize anything.

> BTW - I asked some questions last time around about versioning and
> dealing with any future changes. Did you have any thoughts on that? I
> think it's also a big issue.

Right, sorry. I'll address those now.

Stephen

> Regards,
> Daniel
> 
> 
> > diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
> > new file mode 100644
> > index 00000000..2f7ba7bf
> > --- /dev/null
> > +++ b/patchwork/migrations/0028_remove_old_event_fields.py
> > @@ -0,0 +1,34 @@
> > +# -*- coding: utf-8 -*-
> > +from __future__ import unicode_literals
> > +
> > +from django.db import migrations
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [
> > +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
> > +    ]
> > +
> > +    operations = [
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='created_check',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='current_delegate',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='current_state',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='previous_delegate',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='previous_state',
> > +        ),
> > +    ]
> > diff --git a/patchwork/models.py b/patchwork/models.py
> > index f91b994c..a5992d68 100644
> > --- a/patchwork/models.py
> > +++ b/patchwork/models.py
> > @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
> >  from patchwork.compat import is_authenticated
> >  from patchwork.compat import reverse
> >  from patchwork.fields import HashField
> > +from patchwork.fields import JSONField
> >  from patchwork.hasher import hash_diff
> >  
> >  if settings.ENABLE_REST_API:
> > @@ -943,32 +944,9 @@ class Event(models.Model):
> >          on_delete=models.CASCADE,
> >          help_text='The cover letter that this event was created for.')
> >  
> > -    # fields for 'patch-state-changed' events
> > +    # additional data
> >  
> > -    previous_state = models.ForeignKey(
> > -        State, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -    current_state = models.ForeignKey(
> > -        State, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -
> > -    # fields for 'patch-delegate-changed' events
> > -
> > -    previous_delegate = models.ForeignKey(
> > -        User, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -    current_delegate = models.ForeignKey(
> > -        User, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -
> > -    # fields or 'patch-check-created' events
> > -
> > -    created_check = models.ForeignKey(
> > -        Check, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -
> > -    # TODO(stephenfin): Validate that the correct fields are being set by way
> > -    # of a 'clean' method
> > +    payload = JSONField(null=True, blank=True)
> >  
> >      def __repr__(self):
> >          return "<Event id='%d' category='%s'" % (self.id, self.category)
> > diff --git a/patchwork/signals.py b/patchwork/signals.py
> > index b083b51a..c0f4c08d 100644
> > --- a/patchwork/signals.py
> > +++ b/patchwork/signals.py
> > @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
> >  from django.db.models.signals import pre_save
> >  from django.dispatch import receiver
> >  
> > +from patchwork.api.event import PayloadSerializer
> >  from patchwork.models import Check
> >  from patchwork.models import CoverLetter
> >  from patchwork.models import Event
> > @@ -33,6 +34,14 @@ from patchwork.models import Series
> >  from patchwork.models import SeriesPatch
> >  
> >  
> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> > +class Payload(object):
> > +
> > +    def __init__(self, **kwargs):
> > +        for kwarg in kwargs:
> > +            setattr(self, kwarg, kwargs[kwarg])
> > +
> > +
> >  @receiver(pre_save, sender=Patch)
> >  def patch_change_callback(sender, instance, raw, **kwargs):
> >      # we only want notification of modified patches
> > @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
> >  def create_cover_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(cover):
> > +        payload = PayloadSerializer(Payload(
> > +            cover=cover))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_COVER_CREATED,
> >              project=cover.project,
> > +            payload=payload.data,
> >              cover=cover)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> > @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
> >  def create_patch_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(patch):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_CREATED,
> >              project=patch.project,
> > +            payload=payload.data,
> >              patch=patch)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> > @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
> >  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
> >  
> >      def create_event(patch, before, after):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch,
> > +            previous_state=before,
> > +            current_state=after))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_STATE_CHANGED,
> >              project=patch.project,
> > -            patch=patch,
> > -            previous_state=before,
> > -            current_state=after)
> > +            payload=payload.data,
> > +            patch=patch)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> >      if raw or not instance.pk:
> > @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
> >  def create_patch_delegated_event(sender, instance, raw, **kwargs):
> >  
> >      def create_event(patch, before, after):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch,
> > +            previous_delegate=before,
> > +            current_delegate=after))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_DELEGATED,
> >              project=patch.project,
> > -            patch=patch,
> > -            previous_delegate=before,
> > -            current_delegate=after)
> > +            payload=payload.data,
> > +            patch=patch)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> >      if raw or not instance.pk:
> > @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
> >      """Create patch completed event for patches with series."""
> >  
> >      def create_event(patch, series):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch,
> > +            series=series))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_COMPLETED,
> >              project=patch.project,
> > +            payload=payload.data,
> >              patch=patch,
> >              series=series)
> >  
> > @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
> >  def create_check_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(check):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=check.patch,
> > +            check=check))
> > +
> >          # TODO(stephenfin): It might make sense to add a 'project' field to
> >          # 'check' to prevent lookups here and in the REST API
> >          return Event.objects.create(
> >              category=Event.CATEGORY_CHECK_CREATED,
> >              project=check.patch.project,
> > -            patch=check.patch,
> > -            created_check=check)
> > +            payload=payload.data,
> > +            patch=check.patch)
> >  
> >      # don't trigger for items loaded from fixtures or existing items
> >      if raw or not created:
> > @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
> >  def create_series_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(series):
> > +        payload = PayloadSerializer(Payload(
> > +            series=series))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_SERIES_CREATED,
> >              project=series.project,
> > +            payload=payload.data,
> >              series=series)
> >  
> >      # don't trigger for items loaded from fixtures or existing items
> > @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
> >      # in that case.
> >  
> >      def create_event(series):
> > +        payload = PayloadSerializer(Payload(
> > +            series=series))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_SERIES_COMPLETED,
> >              project=series.project,
> > +            payload=payload.data,
> >              series=series)
> >  
> >      # don't trigger for items loaded from fixtures or existing items
> > diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
> > index 70d563de..39dff6f1 100644
> > --- a/patchwork/tests/test_events.py
> > +++ b/patchwork/tests/test_events.py
> > @@ -17,32 +17,20 @@
> >  # along with Patchwork; if not, write to the Free Software
> >  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> >  
> > +from collections import OrderedDict
> > +
> >  from django.test import TestCase
> >  
> >  from patchwork.models import Event
> >  from patchwork.tests import utils
> >  
> > -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
> > -               'current_delegate']
> > -
> >  
> >  def _get_events(**filters):
> >      # These are sorted by reverse normally, so reverse it once again
> >      return Event.objects.filter(**filters).order_by('date')
> >  
> >  
> > -class _BaseTestCase(TestCase):
> > -
> > -    def assertEventFields(self, event, parent_type='patch', **fields):
> > -        for field_name in [x for x in BASE_FIELDS]:
> > -            field = getattr(event, field_name)
> > -            if field_name in fields:
> > -                self.assertEqual(field, fields[field_name])
> > -            else:
> > -                self.assertIsNone(field)
> > -
> > -
> > -class PatchCreateTest(_BaseTestCase):
> > +class PatchCreateTest(TestCase):
> >  
> >      def test_patch_created(self):
> >          """No series, so patch dependencies implicitly exist."""
> > @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
> >          # This should raise both the CATEGORY_PATCH_CREATED and
> >          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> >          self.assertEqual(events[0].project, patch.project)
> > -        self.assertEventFields(events[0])
> > +
> > +        payload = events[0].payload
> > +        self.assertIsInstance(payload, OrderedDict)
> > +        self.assertIn('patch', payload)
> > +        self.assertEqual(payload['patch']['id'], patch.id)
> >  
> >      def test_patch_dependencies_present_series(self):
> >          """Patch dependencies already exist."""
> > @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
> >          # This should raise both the CATEGORY_PATCH_CREATED and
> >          # CATEGORY_PATCH_COMPLETED events
> >          events = _get_events(patch=series_patch.patch)
> > +
> >          self.assertEqual(events.count(), 2)
> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> >          self.assertEqual(events[0].project, series_patch.patch.project)
> >          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
> >          self.assertEqual(events[1].project, series_patch.patch.project)
> > -        self.assertEventFields(events[0])
> > -        self.assertEventFields(events[1])
> > +
> > +        payload = events[0].payload
> > +        self.assertIn('patch', payload)
> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('patch', payload)
> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> > +        self.assertIn('series', payload)
> > +        self.assertEqual(payload['series']['id'], series_patch.series.id)
> >  
> >      def test_patch_dependencies_out_of_order(self):
> >          series = utils.create_series()
> > @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
> >              events = _get_events(patch=series_patch.patch)
> >              self.assertEqual(events.count(), 1)
> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> > -            self.assertEventFields(events[0])
> >  
> >          series_patch_1 = utils.create_series_patch(series=series, number=1)
> >  
> > @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> >              self.assertEqual(events[1].category,
> >                               Event.CATEGORY_PATCH_COMPLETED)
> > -            self.assertEventFields(events[0])
> > -            self.assertEventFields(events[1])
> >  
> >      def test_patch_dependencies_missing(self):
> >          series_patch = utils.create_series_patch(number=2)
> > @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
> >          events = _get_events(patch=series_patch.patch)
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> > -        self.assertEventFields(events[0])
> >  
> >  
> > -class PatchChangedTest(_BaseTestCase):
> > +class PatchChangedTest(TestCase):
> >  
> >      def test_patch_state_changed(self):
> >          patch = utils.create_patch()
> > @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 2)
> >          # we don't care about the CATEGORY_PATCH_CREATED event here
> >          self.assertEqual(events[1].category,
> >                           Event.CATEGORY_PATCH_STATE_CHANGED)
> >          self.assertEqual(events[1].project, patch.project)
> > -        self.assertEventFields(events[1], previous_state=old_state,
> > -                               current_state=new_state)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('previous_state', payload)
> > +        self.assertEqual(payload['previous_state'], old_state.slug)
> > +        self.assertIn('current_state', payload)
> > +        self.assertEqual(payload['current_state'], new_state.slug)
> >  
> >      def test_patch_delegated(self):
> >          patch = utils.create_patch()
> > @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 2)
> >          # we don't care about the CATEGORY_PATCH_CREATED event here
> >          self.assertEqual(events[1].category,
> >                           Event.CATEGORY_PATCH_DELEGATED)
> >          self.assertEqual(events[1].project, patch.project)
> > -        self.assertEventFields(events[1], current_delegate=delegate_a)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('previous_delegate', payload)
> > +        self.assertIsNone(payload['previous_delegate'])
> > +        self.assertIn('current_delegate', payload)
> > +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
> >  
> >          delegate_b = utils.create_user()
> >  
> > @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 3)
> >          self.assertEqual(events[2].category,
> >                           Event.CATEGORY_PATCH_DELEGATED)
> > -        self.assertEventFields(events[2], previous_delegate=delegate_a,
> > -                               current_delegate=delegate_b)
> > +
> > +        payload = events[2].payload
> > +        self.assertIn('previous_delegate', payload)
> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
> > +        self.assertIn('current_delegate', payload)
> > +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
> >  
> >          # Delegate B -> None
> >  
> > @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 4)
> >          self.assertEqual(events[3].category,
> >                           Event.CATEGORY_PATCH_DELEGATED)
> > -        self.assertEventFields(events[3], previous_delegate=delegate_b)
> >  
> > +        payload = events[3].payload
> > +        self.assertIn('previous_delegate', payload)
> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
> > +        self.assertIn('current_delegate', payload)
> > +        self.assertIsNone(payload['current_delegate'])
> >  
> > -class CheckCreateTest(_BaseTestCase):
> > +
> > +class CheckCreateTest(TestCase):
> >  
> >      def test_check_created(self):
> >          check = utils.create_check()
> > -        events = _get_events(created_check=check)
> > -        self.assertEqual(events.count(), 1)
> > -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
> > -        self.assertEqual(events[0].project, check.patch.project)
> > -        self.assertEventFields(events[0])
> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
> > +        events = _get_events(patch=check.patch)
> >  
> > +        self.assertEqual(events.count(), 2)
> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
> > +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
> > +        self.assertEqual(events[1].project, check.patch.project)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('check', payload)
> > +        self.assertEqual(payload['check']['id'], check.id)
> >  
> > -class CoverCreateTest(_BaseTestCase):
> > +
> > +class CoverCreateTest(TestCase):
> >  
> >      def test_cover_created(self):
> >          cover = utils.create_cover()
> > @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
> >          self.assertEqual(events[0].project, cover.project)
> > -        self.assertEventFields(events[0])
> > +
> > +        payload = events[0].payload
> > +        self.assertIn('cover', payload)
> > +        self.assertEqual(payload['cover']['id'], cover.id)
> >  
> >  
> > -class SeriesCreateTest(_BaseTestCase):
> > +class SeriesCreateTest(TestCase):
> >  
> >      def test_series_created(self):
> >          series = utils.create_series()
> > @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
> >          self.assertEqual(events[0].project, series.project)
> > -        self.assertEventFields(events[0])
> > +
> > +        payload = events[0].payload
> > +        self.assertIn('series', payload)
> > +        self.assertEqual(payload['series']['id'], series.id)
> > -- 
> > 2.14.3
> > 
> > _______________________________________________
> > Patchwork mailing list
> > Patchwork@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/patchwork
Daniel Axtens April 11, 2018, 4:19 p.m. UTC | #4
Stephen Finucane <stephen@that.guru> writes:

> On Wed, 2018-04-11 at 02:23 +1000, Daniel Axtens wrote:
>> > diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
>> > new file mode 100644
>> > index 00000000..5dca85c5
>> > --- /dev/null
>> > +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
>> > @@ -0,0 +1,61 @@
>> > +# -*- coding: utf-8 -*-
>> > +from __future__ import unicode_literals
>> > +
>> > +from django.db import migrations, models
>> > +
>> > +from patchwork.api.event import PayloadSerializer
>> > +from patchwork.models import Event
>> > +
>> > +
>> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
>> > +class Payload(object):
>> > +
>> > +    def __init__(self, **kwargs):
>> > +        for kwarg in kwargs:
>> > +            setattr(self, kwarg, kwargs[kwarg])
>> > +
>> > +
>> > +def generate_payload(apps, schema_editor):
>> > +    # NOTE(stephenfin): We could convert this to native SQL in the future
>> > +    # by using e.g. 'JSON_OBJECT' for MySQL
>> > +    EventBase = apps.get_model('patchwork', 'Event')
>> > +
>> > +    for event in EventBase.objects.all():
>> > +        category = event.category
>> > +        # TODO(stephenfin): This logic should be moved into the EventSerializer
>> > +        # class
>> > +        if category == Event.CATEGORY_COVER_CREATED:
>> > +            payload_obj = Payload(cover=event.cover)
>> > +        elif category == Event.CATEGORY_PATCH_CREATED:
>> > +            payload_obj = Payload(patch=event.patch)
>> > +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
>> > +            payload_obj = Payload(patch=event.patch,
>> > +                                  previous_state=event.previous_state,
>> > +                                  current_state=event.previous_state)
>> > +        elif category == Event.CATEGORY_PATCH_DELEGATED:
>> > +            payload_obj = Payload(patch=event.patch,
>> > +                                  previous_delegate=event.previous_delegate,
>> > +                                  current_delegate=event.current_delegate)
>> > +        elif category == Event.CATEGORY_PATCH_COMPLETED:
>> > +            payload_obj = Payload(patch=event.patch)
>> > +        elif category == Event.CATEGORY_CHECK_CREATED:
>> > +            payload_obj = Payload(patch=event.patch, check=event.created_check)
>> > +        elif category == Event.CATEGORY_SERIES_CREATED:
>> > +            payload_obj = Payload(series=event.series)
>> > +        elif category == Event.CATEGORY_SERIES_COMPLETED:
>> > +            payload_obj = Payload(series=event.series)
>> > +
>> > +        payload = PayloadSerializer(payload_obj).data

Converting this to json.dumps(PayloadSerializer(payload_obj).data)
WFM. It now 'only' uses about a third of my memory for 318k events.

The results seem to render although they are more nested than I remember:

{
        "id": 318637,
        "category": "patch-completed",
        "project": {
            "id": 2,
            "url": "http://localhost:8000/api/projects/2/",
            "name": "Kernel Team",
            "link_name": "kernel-team",
            "list_id": "kernel-team.lists.ubuntu.com",
            "list_email": "kernel-team@lists.ubuntu.com",
            "web_url": "",
            "scm_url": "",
            "webscm_url": ""
        },
        "date": "2018-04-10T15:24:19.650185",
        "payload": {
            "patch": {
                "id": 105448,
                "url": "http://localhost:8000/api/patches/105448/",
                "msgid": "<20170828081014.24028-1-po-hsu.lin@canonical.com>",
                "date": "2017-08-28T08:10:14",
                "name": "[CVE-2016-10200,SRU,Trusty] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{, 6}_bind()",
                "mbox": "http://localhost:8000/patch/105448/mbox/"
            }
        }
    },

ISTR they looked a bit different previously, although I can't remember
if the details (here things like mbox) were previous at the top level or
in some object.

Timings to come.

Regards,
Daniel

>> > +        event.payload = payload
>> > +        event.save()
>> > +
>> > +
>> > +class Migration(migrations.Migration):
>> > +
>> > +    dependencies = [
>> > +        ('patchwork', '0026_add_event_payload_field'),
>> > +    ]
>> > +
>> > +    operations = [
>> > +        migrations.RunPython(generate_payload, atomic=True),
>> > +    ]
>> 
>> 
>> I tried to run this migration on my laptop (16GB ram + swap) on a db
>> with 102k patches. (The postgres sql dump is 781 MB).
>> 
>> It chewed up over 16GB of ram before being OOM killed.
>> 
>> This somewhat derailed my attempts to see if this offers any performance
>> gain to justify the additional complexity.
>
> Darn, I'd tested this with the Patchwork archives and, while slow, it
> did work. Guess it must be loading a lot of stuff into memory. I'll try
> this again later in the week with a larger archive and see if I can
> optimize anything.
>
>> BTW - I asked some questions last time around about versioning and
>> dealing with any future changes. Did you have any thoughts on that? I
>> think it's also a big issue.
>
> Right, sorry. I'll address those now.
>
> Stephen
>
>> Regards,
>> Daniel
>> 
>> 
>> > diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
>> > new file mode 100644
>> > index 00000000..2f7ba7bf
>> > --- /dev/null
>> > +++ b/patchwork/migrations/0028_remove_old_event_fields.py
>> > @@ -0,0 +1,34 @@
>> > +# -*- coding: utf-8 -*-
>> > +from __future__ import unicode_literals
>> > +
>> > +from django.db import migrations
>> > +
>> > +
>> > +class Migration(migrations.Migration):
>> > +
>> > +    dependencies = [
>> > +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
>> > +    ]
>> > +
>> > +    operations = [
>> > +        migrations.RemoveField(
>> > +            model_name='event',
>> > +            name='created_check',
>> > +        ),
>> > +        migrations.RemoveField(
>> > +            model_name='event',
>> > +            name='current_delegate',
>> > +        ),
>> > +        migrations.RemoveField(
>> > +            model_name='event',
>> > +            name='current_state',
>> > +        ),
>> > +        migrations.RemoveField(
>> > +            model_name='event',
>> > +            name='previous_delegate',
>> > +        ),
>> > +        migrations.RemoveField(
>> > +            model_name='event',
>> > +            name='previous_state',
>> > +        ),
>> > +    ]
>> > diff --git a/patchwork/models.py b/patchwork/models.py
>> > index f91b994c..a5992d68 100644
>> > --- a/patchwork/models.py
>> > +++ b/patchwork/models.py
>> > @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
>> >  from patchwork.compat import is_authenticated
>> >  from patchwork.compat import reverse
>> >  from patchwork.fields import HashField
>> > +from patchwork.fields import JSONField
>> >  from patchwork.hasher import hash_diff
>> >  
>> >  if settings.ENABLE_REST_API:
>> > @@ -943,32 +944,9 @@ class Event(models.Model):
>> >          on_delete=models.CASCADE,
>> >          help_text='The cover letter that this event was created for.')
>> >  
>> > -    # fields for 'patch-state-changed' events
>> > +    # additional data
>> >  
>> > -    previous_state = models.ForeignKey(
>> > -        State, related_name='+', null=True, blank=True,
>> > -        on_delete=models.CASCADE)
>> > -    current_state = models.ForeignKey(
>> > -        State, related_name='+', null=True, blank=True,
>> > -        on_delete=models.CASCADE)
>> > -
>> > -    # fields for 'patch-delegate-changed' events
>> > -
>> > -    previous_delegate = models.ForeignKey(
>> > -        User, related_name='+', null=True, blank=True,
>> > -        on_delete=models.CASCADE)
>> > -    current_delegate = models.ForeignKey(
>> > -        User, related_name='+', null=True, blank=True,
>> > -        on_delete=models.CASCADE)
>> > -
>> > -    # fields or 'patch-check-created' events
>> > -
>> > -    created_check = models.ForeignKey(
>> > -        Check, related_name='+', null=True, blank=True,
>> > -        on_delete=models.CASCADE)
>> > -
>> > -    # TODO(stephenfin): Validate that the correct fields are being set by way
>> > -    # of a 'clean' method
>> > +    payload = JSONField(null=True, blank=True)
>> >  
>> >      def __repr__(self):
>> >          return "<Event id='%d' category='%s'" % (self.id, self.category)
>> > diff --git a/patchwork/signals.py b/patchwork/signals.py
>> > index b083b51a..c0f4c08d 100644
>> > --- a/patchwork/signals.py
>> > +++ b/patchwork/signals.py
>> > @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
>> >  from django.db.models.signals import pre_save
>> >  from django.dispatch import receiver
>> >  
>> > +from patchwork.api.event import PayloadSerializer
>> >  from patchwork.models import Check
>> >  from patchwork.models import CoverLetter
>> >  from patchwork.models import Event
>> > @@ -33,6 +34,14 @@ from patchwork.models import Series
>> >  from patchwork.models import SeriesPatch
>> >  
>> >  
>> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
>> > +class Payload(object):
>> > +
>> > +    def __init__(self, **kwargs):
>> > +        for kwarg in kwargs:
>> > +            setattr(self, kwarg, kwargs[kwarg])
>> > +
>> > +
>> >  @receiver(pre_save, sender=Patch)
>> >  def patch_change_callback(sender, instance, raw, **kwargs):
>> >      # we only want notification of modified patches
>> > @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
>> >  def create_cover_created_event(sender, instance, created, raw, **kwargs):
>> >  
>> >      def create_event(cover):
>> > +        payload = PayloadSerializer(Payload(
>> > +            cover=cover))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_COVER_CREATED,
>> >              project=cover.project,
>> > +            payload=payload.data,
>> >              cover=cover)
>> >  
>> >      # don't trigger for items loaded from fixtures or new items
>> > @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
>> >  def create_patch_created_event(sender, instance, created, raw, **kwargs):
>> >  
>> >      def create_event(patch):
>> > +        payload = PayloadSerializer(Payload(
>> > +            patch=patch))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_PATCH_CREATED,
>> >              project=patch.project,
>> > +            payload=payload.data,
>> >              patch=patch)
>> >  
>> >      # don't trigger for items loaded from fixtures or new items
>> > @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
>> >  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>> >  
>> >      def create_event(patch, before, after):
>> > +        payload = PayloadSerializer(Payload(
>> > +            patch=patch,
>> > +            previous_state=before,
>> > +            current_state=after))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_PATCH_STATE_CHANGED,
>> >              project=patch.project,
>> > -            patch=patch,
>> > -            previous_state=before,
>> > -            current_state=after)
>> > +            payload=payload.data,
>> > +            patch=patch)
>> >  
>> >      # don't trigger for items loaded from fixtures or new items
>> >      if raw or not instance.pk:
>> > @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>> >  def create_patch_delegated_event(sender, instance, raw, **kwargs):
>> >  
>> >      def create_event(patch, before, after):
>> > +        payload = PayloadSerializer(Payload(
>> > +            patch=patch,
>> > +            previous_delegate=before,
>> > +            current_delegate=after))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_PATCH_DELEGATED,
>> >              project=patch.project,
>> > -            patch=patch,
>> > -            previous_delegate=before,
>> > -            current_delegate=after)
>> > +            payload=payload.data,
>> > +            patch=patch)
>> >  
>> >      # don't trigger for items loaded from fixtures or new items
>> >      if raw or not instance.pk:
>> > @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>> >      """Create patch completed event for patches with series."""
>> >  
>> >      def create_event(patch, series):
>> > +        payload = PayloadSerializer(Payload(
>> > +            patch=patch,
>> > +            series=series))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_PATCH_COMPLETED,
>> >              project=patch.project,
>> > +            payload=payload.data,
>> >              patch=patch,
>> >              series=series)
>> >  
>> > @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>> >  def create_check_created_event(sender, instance, created, raw, **kwargs):
>> >  
>> >      def create_event(check):
>> > +        payload = PayloadSerializer(Payload(
>> > +            patch=check.patch,
>> > +            check=check))
>> > +
>> >          # TODO(stephenfin): It might make sense to add a 'project' field to
>> >          # 'check' to prevent lookups here and in the REST API
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_CHECK_CREATED,
>> >              project=check.patch.project,
>> > -            patch=check.patch,
>> > -            created_check=check)
>> > +            payload=payload.data,
>> > +            patch=check.patch)
>> >  
>> >      # don't trigger for items loaded from fixtures or existing items
>> >      if raw or not created:
>> > @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
>> >  def create_series_created_event(sender, instance, created, raw, **kwargs):
>> >  
>> >      def create_event(series):
>> > +        payload = PayloadSerializer(Payload(
>> > +            series=series))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_SERIES_CREATED,
>> >              project=series.project,
>> > +            payload=payload.data,
>> >              series=series)
>> >  
>> >      # don't trigger for items loaded from fixtures or existing items
>> > @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
>> >      # in that case.
>> >  
>> >      def create_event(series):
>> > +        payload = PayloadSerializer(Payload(
>> > +            series=series))
>> > +
>> >          return Event.objects.create(
>> >              category=Event.CATEGORY_SERIES_COMPLETED,
>> >              project=series.project,
>> > +            payload=payload.data,
>> >              series=series)
>> >  
>> >      # don't trigger for items loaded from fixtures or existing items
>> > diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
>> > index 70d563de..39dff6f1 100644
>> > --- a/patchwork/tests/test_events.py
>> > +++ b/patchwork/tests/test_events.py
>> > @@ -17,32 +17,20 @@
>> >  # along with Patchwork; if not, write to the Free Software
>> >  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>> >  
>> > +from collections import OrderedDict
>> > +
>> >  from django.test import TestCase
>> >  
>> >  from patchwork.models import Event
>> >  from patchwork.tests import utils
>> >  
>> > -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
>> > -               'current_delegate']
>> > -
>> >  
>> >  def _get_events(**filters):
>> >      # These are sorted by reverse normally, so reverse it once again
>> >      return Event.objects.filter(**filters).order_by('date')
>> >  
>> >  
>> > -class _BaseTestCase(TestCase):
>> > -
>> > -    def assertEventFields(self, event, parent_type='patch', **fields):
>> > -        for field_name in [x for x in BASE_FIELDS]:
>> > -            field = getattr(event, field_name)
>> > -            if field_name in fields:
>> > -                self.assertEqual(field, fields[field_name])
>> > -            else:
>> > -                self.assertIsNone(field)
>> > -
>> > -
>> > -class PatchCreateTest(_BaseTestCase):
>> > +class PatchCreateTest(TestCase):
>> >  
>> >      def test_patch_created(self):
>> >          """No series, so patch dependencies implicitly exist."""
>> > @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
>> >          # This should raise both the CATEGORY_PATCH_CREATED and
>> >          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
>> >          events = _get_events(patch=patch)
>> > +
>> >          self.assertEqual(events.count(), 1)
>> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>> >          self.assertEqual(events[0].project, patch.project)
>> > -        self.assertEventFields(events[0])
>> > +
>> > +        payload = events[0].payload
>> > +        self.assertIsInstance(payload, OrderedDict)
>> > +        self.assertIn('patch', payload)
>> > +        self.assertEqual(payload['patch']['id'], patch.id)
>> >  
>> >      def test_patch_dependencies_present_series(self):
>> >          """Patch dependencies already exist."""
>> > @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
>> >          # This should raise both the CATEGORY_PATCH_CREATED and
>> >          # CATEGORY_PATCH_COMPLETED events
>> >          events = _get_events(patch=series_patch.patch)
>> > +
>> >          self.assertEqual(events.count(), 2)
>> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>> >          self.assertEqual(events[0].project, series_patch.patch.project)
>> >          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
>> >          self.assertEqual(events[1].project, series_patch.patch.project)
>> > -        self.assertEventFields(events[0])
>> > -        self.assertEventFields(events[1])
>> > +
>> > +        payload = events[0].payload
>> > +        self.assertIn('patch', payload)
>> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
>> > +
>> > +        payload = events[1].payload
>> > +        self.assertIn('patch', payload)
>> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
>> > +        self.assertIn('series', payload)
>> > +        self.assertEqual(payload['series']['id'], series_patch.series.id)
>> >  
>> >      def test_patch_dependencies_out_of_order(self):
>> >          series = utils.create_series()
>> > @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
>> >              events = _get_events(patch=series_patch.patch)
>> >              self.assertEqual(events.count(), 1)
>> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>> > -            self.assertEventFields(events[0])
>> >  
>> >          series_patch_1 = utils.create_series_patch(series=series, number=1)
>> >  
>> > @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
>> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>> >              self.assertEqual(events[1].category,
>> >                               Event.CATEGORY_PATCH_COMPLETED)
>> > -            self.assertEventFields(events[0])
>> > -            self.assertEventFields(events[1])
>> >  
>> >      def test_patch_dependencies_missing(self):
>> >          series_patch = utils.create_series_patch(number=2)
>> > @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
>> >          events = _get_events(patch=series_patch.patch)
>> >          self.assertEqual(events.count(), 1)
>> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>> > -        self.assertEventFields(events[0])
>> >  
>> >  
>> > -class PatchChangedTest(_BaseTestCase):
>> > +class PatchChangedTest(TestCase):
>> >  
>> >      def test_patch_state_changed(self):
>> >          patch = utils.create_patch()
>> > @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
>> >          patch.save()
>> >  
>> >          events = _get_events(patch=patch)
>> > +
>> >          self.assertEqual(events.count(), 2)
>> >          # we don't care about the CATEGORY_PATCH_CREATED event here
>> >          self.assertEqual(events[1].category,
>> >                           Event.CATEGORY_PATCH_STATE_CHANGED)
>> >          self.assertEqual(events[1].project, patch.project)
>> > -        self.assertEventFields(events[1], previous_state=old_state,
>> > -                               current_state=new_state)
>> > +
>> > +        payload = events[1].payload
>> > +        self.assertIn('previous_state', payload)
>> > +        self.assertEqual(payload['previous_state'], old_state.slug)
>> > +        self.assertIn('current_state', payload)
>> > +        self.assertEqual(payload['current_state'], new_state.slug)
>> >  
>> >      def test_patch_delegated(self):
>> >          patch = utils.create_patch()
>> > @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
>> >          patch.save()
>> >  
>> >          events = _get_events(patch=patch)
>> > +
>> >          self.assertEqual(events.count(), 2)
>> >          # we don't care about the CATEGORY_PATCH_CREATED event here
>> >          self.assertEqual(events[1].category,
>> >                           Event.CATEGORY_PATCH_DELEGATED)
>> >          self.assertEqual(events[1].project, patch.project)
>> > -        self.assertEventFields(events[1], current_delegate=delegate_a)
>> > +
>> > +        payload = events[1].payload
>> > +        self.assertIn('previous_delegate', payload)
>> > +        self.assertIsNone(payload['previous_delegate'])
>> > +        self.assertIn('current_delegate', payload)
>> > +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
>> >  
>> >          delegate_b = utils.create_user()
>> >  
>> > @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
>> >          patch.save()
>> >  
>> >          events = _get_events(patch=patch)
>> > +
>> >          self.assertEqual(events.count(), 3)
>> >          self.assertEqual(events[2].category,
>> >                           Event.CATEGORY_PATCH_DELEGATED)
>> > -        self.assertEventFields(events[2], previous_delegate=delegate_a,
>> > -                               current_delegate=delegate_b)
>> > +
>> > +        payload = events[2].payload
>> > +        self.assertIn('previous_delegate', payload)
>> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
>> > +        self.assertIn('current_delegate', payload)
>> > +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
>> >  
>> >          # Delegate B -> None
>> >  
>> > @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
>> >          patch.save()
>> >  
>> >          events = _get_events(patch=patch)
>> > +
>> >          self.assertEqual(events.count(), 4)
>> >          self.assertEqual(events[3].category,
>> >                           Event.CATEGORY_PATCH_DELEGATED)
>> > -        self.assertEventFields(events[3], previous_delegate=delegate_b)
>> >  
>> > +        payload = events[3].payload
>> > +        self.assertIn('previous_delegate', payload)
>> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
>> > +        self.assertIn('current_delegate', payload)
>> > +        self.assertIsNone(payload['current_delegate'])
>> >  
>> > -class CheckCreateTest(_BaseTestCase):
>> > +
>> > +class CheckCreateTest(TestCase):
>> >  
>> >      def test_check_created(self):
>> >          check = utils.create_check()
>> > -        events = _get_events(created_check=check)
>> > -        self.assertEqual(events.count(), 1)
>> > -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
>> > -        self.assertEqual(events[0].project, check.patch.project)
>> > -        self.assertEventFields(events[0])
>> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
>> > +        events = _get_events(patch=check.patch)
>> >  
>> > +        self.assertEqual(events.count(), 2)
>> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
>> > +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
>> > +        self.assertEqual(events[1].project, check.patch.project)
>> > +
>> > +        payload = events[1].payload
>> > +        self.assertIn('check', payload)
>> > +        self.assertEqual(payload['check']['id'], check.id)
>> >  
>> > -class CoverCreateTest(_BaseTestCase):
>> > +
>> > +class CoverCreateTest(TestCase):
>> >  
>> >      def test_cover_created(self):
>> >          cover = utils.create_cover()
>> > @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
>> >          self.assertEqual(events.count(), 1)
>> >          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
>> >          self.assertEqual(events[0].project, cover.project)
>> > -        self.assertEventFields(events[0])
>> > +
>> > +        payload = events[0].payload
>> > +        self.assertIn('cover', payload)
>> > +        self.assertEqual(payload['cover']['id'], cover.id)
>> >  
>> >  
>> > -class SeriesCreateTest(_BaseTestCase):
>> > +class SeriesCreateTest(TestCase):
>> >  
>> >      def test_series_created(self):
>> >          series = utils.create_series()
>> > @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
>> >          self.assertEqual(events.count(), 1)
>> >          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
>> >          self.assertEqual(events[0].project, series.project)
>> > -        self.assertEventFields(events[0])
>> > +
>> > +        payload = events[0].payload
>> > +        self.assertIn('series', payload)
>> > +        self.assertEqual(payload['series']['id'], series.id)
>> > -- 
>> > 2.14.3
>> > 
>> > _______________________________________________
>> > Patchwork mailing list
>> > Patchwork@lists.ozlabs.org
>> > https://lists.ozlabs.org/listinfo/patchwork
Daniel Axtens April 12, 2018, 1:50 a.m. UTC | #5
Daniel Axtens <dja@axtens.net> writes:

> Stephen Finucane <stephen@that.guru> writes:
>
>> On Wed, 2018-04-11 at 02:23 +1000, Daniel Axtens wrote:
>>> > diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
>>> > new file mode 100644
>>> > index 00000000..5dca85c5
>>> > --- /dev/null
>>> > +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
>>> > @@ -0,0 +1,61 @@
>>> > +# -*- coding: utf-8 -*-
>>> > +from __future__ import unicode_literals
>>> > +
>>> > +from django.db import migrations, models
>>> > +
>>> > +from patchwork.api.event import PayloadSerializer
>>> > +from patchwork.models import Event
>>> > +
>>> > +
>>> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
>>> > +class Payload(object):
>>> > +
>>> > +    def __init__(self, **kwargs):
>>> > +        for kwarg in kwargs:
>>> > +            setattr(self, kwarg, kwargs[kwarg])
>>> > +
>>> > +
>>> > +def generate_payload(apps, schema_editor):
>>> > +    # NOTE(stephenfin): We could convert this to native SQL in the future
>>> > +    # by using e.g. 'JSON_OBJECT' for MySQL
>>> > +    EventBase = apps.get_model('patchwork', 'Event')
>>> > +
>>> > +    for event in EventBase.objects.all():
>>> > +        category = event.category
>>> > +        # TODO(stephenfin): This logic should be moved into the EventSerializer
>>> > +        # class
>>> > +        if category == Event.CATEGORY_COVER_CREATED:
>>> > +            payload_obj = Payload(cover=event.cover)
>>> > +        elif category == Event.CATEGORY_PATCH_CREATED:
>>> > +            payload_obj = Payload(patch=event.patch)
>>> > +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
>>> > +            payload_obj = Payload(patch=event.patch,
>>> > +                                  previous_state=event.previous_state,
>>> > +                                  current_state=event.previous_state)
>>> > +        elif category == Event.CATEGORY_PATCH_DELEGATED:
>>> > +            payload_obj = Payload(patch=event.patch,
>>> > +                                  previous_delegate=event.previous_delegate,
>>> > +                                  current_delegate=event.current_delegate)
>>> > +        elif category == Event.CATEGORY_PATCH_COMPLETED:
>>> > +            payload_obj = Payload(patch=event.patch)
>>> > +        elif category == Event.CATEGORY_CHECK_CREATED:
>>> > +            payload_obj = Payload(patch=event.patch, check=event.created_check)
>>> > +        elif category == Event.CATEGORY_SERIES_CREATED:
>>> > +            payload_obj = Payload(series=event.series)
>>> > +        elif category == Event.CATEGORY_SERIES_COMPLETED:
>>> > +            payload_obj = Payload(series=event.series)
>>> > +
>>> > +        payload = PayloadSerializer(payload_obj).data
>
> Converting this to json.dumps(PayloadSerializer(payload_obj).data)
> WFM. It now 'only' uses about a third of my memory for 318k events.
>
> The results seem to render although they are more nested than I remember:
>
> {
>         "id": 318637,
>         "category": "patch-completed",
>         "project": {
>             "id": 2,
>             "url": "http://localhost:8000/api/projects/2/",
>             "name": "Kernel Team",
>             "link_name": "kernel-team",
>             "list_id": "kernel-team.lists.ubuntu.com",
>             "list_email": "kernel-team@lists.ubuntu.com",
>             "web_url": "",
>             "scm_url": "",
>             "webscm_url": ""
>         },
>         "date": "2018-04-10T15:24:19.650185",
>         "payload": {
>             "patch": {
>                 "id": 105448,
>                 "url": "http://localhost:8000/api/patches/105448/",
>                 "msgid": "<20170828081014.24028-1-po-hsu.lin@canonical.com>",
>                 "date": "2017-08-28T08:10:14",
>                 "name": "[CVE-2016-10200,SRU,Trusty] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{, 6}_bind()",
>                 "mbox": "http://localhost:8000/patch/105448/mbox/"
>             }
>         }
>     },
>
> ISTR they looked a bit different previously, although I can't remember
> if the details (here things like mbox) were previous at the top level or
> in some object.
>
> Timings to come.

Preliminary results with 100 request to the old (non-refactored +
prefetch_related) vs 100 requests to the new (this series):

Median time for old: 0.226s
            for new: 0.298s

So this appears to be slower. I haven't done any rigorous testing or
statistics though.

You can do better by changing select_related to prefetch_related. With
that, I see a median of 0.197s. I don't think the complexity this
patchset brings is justified by a ~30ms/req improvement though.

Regards,
Daniel
>
> Regards,
> Daniel
>
>>> > +        event.payload = payload
>>> > +        event.save()
>>> > +
>>> > +
>>> > +class Migration(migrations.Migration):
>>> > +
>>> > +    dependencies = [
>>> > +        ('patchwork', '0026_add_event_payload_field'),
>>> > +    ]
>>> > +
>>> > +    operations = [
>>> > +        migrations.RunPython(generate_payload, atomic=True),
>>> > +    ]
>>> 
>>> 
>>> I tried to run this migration on my laptop (16GB ram + swap) on a db
>>> with 102k patches. (The postgres sql dump is 781 MB).
>>> 
>>> It chewed up over 16GB of ram before being OOM killed.
>>> 
>>> This somewhat derailed my attempts to see if this offers any performance
>>> gain to justify the additional complexity.
>>
>> Darn, I'd tested this with the Patchwork archives and, while slow, it
>> did work. Guess it must be loading a lot of stuff into memory. I'll try
>> this again later in the week with a larger archive and see if I can
>> optimize anything.
>>
>>> BTW - I asked some questions last time around about versioning and
>>> dealing with any future changes. Did you have any thoughts on that? I
>>> think it's also a big issue.
>>
>> Right, sorry. I'll address those now.
>>
>> Stephen
>>
>>> Regards,
>>> Daniel
>>> 
>>> 
>>> > diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
>>> > new file mode 100644
>>> > index 00000000..2f7ba7bf
>>> > --- /dev/null
>>> > +++ b/patchwork/migrations/0028_remove_old_event_fields.py
>>> > @@ -0,0 +1,34 @@
>>> > +# -*- coding: utf-8 -*-
>>> > +from __future__ import unicode_literals
>>> > +
>>> > +from django.db import migrations
>>> > +
>>> > +
>>> > +class Migration(migrations.Migration):
>>> > +
>>> > +    dependencies = [
>>> > +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
>>> > +    ]
>>> > +
>>> > +    operations = [
>>> > +        migrations.RemoveField(
>>> > +            model_name='event',
>>> > +            name='created_check',
>>> > +        ),
>>> > +        migrations.RemoveField(
>>> > +            model_name='event',
>>> > +            name='current_delegate',
>>> > +        ),
>>> > +        migrations.RemoveField(
>>> > +            model_name='event',
>>> > +            name='current_state',
>>> > +        ),
>>> > +        migrations.RemoveField(
>>> > +            model_name='event',
>>> > +            name='previous_delegate',
>>> > +        ),
>>> > +        migrations.RemoveField(
>>> > +            model_name='event',
>>> > +            name='previous_state',
>>> > +        ),
>>> > +    ]
>>> > diff --git a/patchwork/models.py b/patchwork/models.py
>>> > index f91b994c..a5992d68 100644
>>> > --- a/patchwork/models.py
>>> > +++ b/patchwork/models.py
>>> > @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
>>> >  from patchwork.compat import is_authenticated
>>> >  from patchwork.compat import reverse
>>> >  from patchwork.fields import HashField
>>> > +from patchwork.fields import JSONField
>>> >  from patchwork.hasher import hash_diff
>>> >  
>>> >  if settings.ENABLE_REST_API:
>>> > @@ -943,32 +944,9 @@ class Event(models.Model):
>>> >          on_delete=models.CASCADE,
>>> >          help_text='The cover letter that this event was created for.')
>>> >  
>>> > -    # fields for 'patch-state-changed' events
>>> > +    # additional data
>>> >  
>>> > -    previous_state = models.ForeignKey(
>>> > -        State, related_name='+', null=True, blank=True,
>>> > -        on_delete=models.CASCADE)
>>> > -    current_state = models.ForeignKey(
>>> > -        State, related_name='+', null=True, blank=True,
>>> > -        on_delete=models.CASCADE)
>>> > -
>>> > -    # fields for 'patch-delegate-changed' events
>>> > -
>>> > -    previous_delegate = models.ForeignKey(
>>> > -        User, related_name='+', null=True, blank=True,
>>> > -        on_delete=models.CASCADE)
>>> > -    current_delegate = models.ForeignKey(
>>> > -        User, related_name='+', null=True, blank=True,
>>> > -        on_delete=models.CASCADE)
>>> > -
>>> > -    # fields or 'patch-check-created' events
>>> > -
>>> > -    created_check = models.ForeignKey(
>>> > -        Check, related_name='+', null=True, blank=True,
>>> > -        on_delete=models.CASCADE)
>>> > -
>>> > -    # TODO(stephenfin): Validate that the correct fields are being set by way
>>> > -    # of a 'clean' method
>>> > +    payload = JSONField(null=True, blank=True)
>>> >  
>>> >      def __repr__(self):
>>> >          return "<Event id='%d' category='%s'" % (self.id, self.category)
>>> > diff --git a/patchwork/signals.py b/patchwork/signals.py
>>> > index b083b51a..c0f4c08d 100644
>>> > --- a/patchwork/signals.py
>>> > +++ b/patchwork/signals.py
>>> > @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
>>> >  from django.db.models.signals import pre_save
>>> >  from django.dispatch import receiver
>>> >  
>>> > +from patchwork.api.event import PayloadSerializer
>>> >  from patchwork.models import Check
>>> >  from patchwork.models import CoverLetter
>>> >  from patchwork.models import Event
>>> > @@ -33,6 +34,14 @@ from patchwork.models import Series
>>> >  from patchwork.models import SeriesPatch
>>> >  
>>> >  
>>> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
>>> > +class Payload(object):
>>> > +
>>> > +    def __init__(self, **kwargs):
>>> > +        for kwarg in kwargs:
>>> > +            setattr(self, kwarg, kwargs[kwarg])
>>> > +
>>> > +
>>> >  @receiver(pre_save, sender=Patch)
>>> >  def patch_change_callback(sender, instance, raw, **kwargs):
>>> >      # we only want notification of modified patches
>>> > @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
>>> >  def create_cover_created_event(sender, instance, created, raw, **kwargs):
>>> >  
>>> >      def create_event(cover):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            cover=cover))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_COVER_CREATED,
>>> >              project=cover.project,
>>> > +            payload=payload.data,
>>> >              cover=cover)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or new items
>>> > @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
>>> >  def create_patch_created_event(sender, instance, created, raw, **kwargs):
>>> >  
>>> >      def create_event(patch):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            patch=patch))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_PATCH_CREATED,
>>> >              project=patch.project,
>>> > +            payload=payload.data,
>>> >              patch=patch)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or new items
>>> > @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
>>> >  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>>> >  
>>> >      def create_event(patch, before, after):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            patch=patch,
>>> > +            previous_state=before,
>>> > +            current_state=after))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_PATCH_STATE_CHANGED,
>>> >              project=patch.project,
>>> > -            patch=patch,
>>> > -            previous_state=before,
>>> > -            current_state=after)
>>> > +            payload=payload.data,
>>> > +            patch=patch)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or new items
>>> >      if raw or not instance.pk:
>>> > @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>>> >  def create_patch_delegated_event(sender, instance, raw, **kwargs):
>>> >  
>>> >      def create_event(patch, before, after):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            patch=patch,
>>> > +            previous_delegate=before,
>>> > +            current_delegate=after))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_PATCH_DELEGATED,
>>> >              project=patch.project,
>>> > -            patch=patch,
>>> > -            previous_delegate=before,
>>> > -            current_delegate=after)
>>> > +            payload=payload.data,
>>> > +            patch=patch)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or new items
>>> >      if raw or not instance.pk:
>>> > @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>>> >      """Create patch completed event for patches with series."""
>>> >  
>>> >      def create_event(patch, series):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            patch=patch,
>>> > +            series=series))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_PATCH_COMPLETED,
>>> >              project=patch.project,
>>> > +            payload=payload.data,
>>> >              patch=patch,
>>> >              series=series)
>>> >  
>>> > @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>>> >  def create_check_created_event(sender, instance, created, raw, **kwargs):
>>> >  
>>> >      def create_event(check):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            patch=check.patch,
>>> > +            check=check))
>>> > +
>>> >          # TODO(stephenfin): It might make sense to add a 'project' field to
>>> >          # 'check' to prevent lookups here and in the REST API
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_CHECK_CREATED,
>>> >              project=check.patch.project,
>>> > -            patch=check.patch,
>>> > -            created_check=check)
>>> > +            payload=payload.data,
>>> > +            patch=check.patch)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or existing items
>>> >      if raw or not created:
>>> > @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
>>> >  def create_series_created_event(sender, instance, created, raw, **kwargs):
>>> >  
>>> >      def create_event(series):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            series=series))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_SERIES_CREATED,
>>> >              project=series.project,
>>> > +            payload=payload.data,
>>> >              series=series)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or existing items
>>> > @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
>>> >      # in that case.
>>> >  
>>> >      def create_event(series):
>>> > +        payload = PayloadSerializer(Payload(
>>> > +            series=series))
>>> > +
>>> >          return Event.objects.create(
>>> >              category=Event.CATEGORY_SERIES_COMPLETED,
>>> >              project=series.project,
>>> > +            payload=payload.data,
>>> >              series=series)
>>> >  
>>> >      # don't trigger for items loaded from fixtures or existing items
>>> > diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
>>> > index 70d563de..39dff6f1 100644
>>> > --- a/patchwork/tests/test_events.py
>>> > +++ b/patchwork/tests/test_events.py
>>> > @@ -17,32 +17,20 @@
>>> >  # along with Patchwork; if not, write to the Free Software
>>> >  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>>> >  
>>> > +from collections import OrderedDict
>>> > +
>>> >  from django.test import TestCase
>>> >  
>>> >  from patchwork.models import Event
>>> >  from patchwork.tests import utils
>>> >  
>>> > -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
>>> > -               'current_delegate']
>>> > -
>>> >  
>>> >  def _get_events(**filters):
>>> >      # These are sorted by reverse normally, so reverse it once again
>>> >      return Event.objects.filter(**filters).order_by('date')
>>> >  
>>> >  
>>> > -class _BaseTestCase(TestCase):
>>> > -
>>> > -    def assertEventFields(self, event, parent_type='patch', **fields):
>>> > -        for field_name in [x for x in BASE_FIELDS]:
>>> > -            field = getattr(event, field_name)
>>> > -            if field_name in fields:
>>> > -                self.assertEqual(field, fields[field_name])
>>> > -            else:
>>> > -                self.assertIsNone(field)
>>> > -
>>> > -
>>> > -class PatchCreateTest(_BaseTestCase):
>>> > +class PatchCreateTest(TestCase):
>>> >  
>>> >      def test_patch_created(self):
>>> >          """No series, so patch dependencies implicitly exist."""
>>> > @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
>>> >          # This should raise both the CATEGORY_PATCH_CREATED and
>>> >          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
>>> >          events = _get_events(patch=patch)
>>> > +
>>> >          self.assertEqual(events.count(), 1)
>>> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>>> >          self.assertEqual(events[0].project, patch.project)
>>> > -        self.assertEventFields(events[0])
>>> > +
>>> > +        payload = events[0].payload
>>> > +        self.assertIsInstance(payload, OrderedDict)
>>> > +        self.assertIn('patch', payload)
>>> > +        self.assertEqual(payload['patch']['id'], patch.id)
>>> >  
>>> >      def test_patch_dependencies_present_series(self):
>>> >          """Patch dependencies already exist."""
>>> > @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
>>> >          # This should raise both the CATEGORY_PATCH_CREATED and
>>> >          # CATEGORY_PATCH_COMPLETED events
>>> >          events = _get_events(patch=series_patch.patch)
>>> > +
>>> >          self.assertEqual(events.count(), 2)
>>> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>>> >          self.assertEqual(events[0].project, series_patch.patch.project)
>>> >          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
>>> >          self.assertEqual(events[1].project, series_patch.patch.project)
>>> > -        self.assertEventFields(events[0])
>>> > -        self.assertEventFields(events[1])
>>> > +
>>> > +        payload = events[0].payload
>>> > +        self.assertIn('patch', payload)
>>> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
>>> > +
>>> > +        payload = events[1].payload
>>> > +        self.assertIn('patch', payload)
>>> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
>>> > +        self.assertIn('series', payload)
>>> > +        self.assertEqual(payload['series']['id'], series_patch.series.id)
>>> >  
>>> >      def test_patch_dependencies_out_of_order(self):
>>> >          series = utils.create_series()
>>> > @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
>>> >              events = _get_events(patch=series_patch.patch)
>>> >              self.assertEqual(events.count(), 1)
>>> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>>> > -            self.assertEventFields(events[0])
>>> >  
>>> >          series_patch_1 = utils.create_series_patch(series=series, number=1)
>>> >  
>>> > @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
>>> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>>> >              self.assertEqual(events[1].category,
>>> >                               Event.CATEGORY_PATCH_COMPLETED)
>>> > -            self.assertEventFields(events[0])
>>> > -            self.assertEventFields(events[1])
>>> >  
>>> >      def test_patch_dependencies_missing(self):
>>> >          series_patch = utils.create_series_patch(number=2)
>>> > @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
>>> >          events = _get_events(patch=series_patch.patch)
>>> >          self.assertEqual(events.count(), 1)
>>> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>>> > -        self.assertEventFields(events[0])
>>> >  
>>> >  
>>> > -class PatchChangedTest(_BaseTestCase):
>>> > +class PatchChangedTest(TestCase):
>>> >  
>>> >      def test_patch_state_changed(self):
>>> >          patch = utils.create_patch()
>>> > @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
>>> >          patch.save()
>>> >  
>>> >          events = _get_events(patch=patch)
>>> > +
>>> >          self.assertEqual(events.count(), 2)
>>> >          # we don't care about the CATEGORY_PATCH_CREATED event here
>>> >          self.assertEqual(events[1].category,
>>> >                           Event.CATEGORY_PATCH_STATE_CHANGED)
>>> >          self.assertEqual(events[1].project, patch.project)
>>> > -        self.assertEventFields(events[1], previous_state=old_state,
>>> > -                               current_state=new_state)
>>> > +
>>> > +        payload = events[1].payload
>>> > +        self.assertIn('previous_state', payload)
>>> > +        self.assertEqual(payload['previous_state'], old_state.slug)
>>> > +        self.assertIn('current_state', payload)
>>> > +        self.assertEqual(payload['current_state'], new_state.slug)
>>> >  
>>> >      def test_patch_delegated(self):
>>> >          patch = utils.create_patch()
>>> > @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
>>> >          patch.save()
>>> >  
>>> >          events = _get_events(patch=patch)
>>> > +
>>> >          self.assertEqual(events.count(), 2)
>>> >          # we don't care about the CATEGORY_PATCH_CREATED event here
>>> >          self.assertEqual(events[1].category,
>>> >                           Event.CATEGORY_PATCH_DELEGATED)
>>> >          self.assertEqual(events[1].project, patch.project)
>>> > -        self.assertEventFields(events[1], current_delegate=delegate_a)
>>> > +
>>> > +        payload = events[1].payload
>>> > +        self.assertIn('previous_delegate', payload)
>>> > +        self.assertIsNone(payload['previous_delegate'])
>>> > +        self.assertIn('current_delegate', payload)
>>> > +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
>>> >  
>>> >          delegate_b = utils.create_user()
>>> >  
>>> > @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
>>> >          patch.save()
>>> >  
>>> >          events = _get_events(patch=patch)
>>> > +
>>> >          self.assertEqual(events.count(), 3)
>>> >          self.assertEqual(events[2].category,
>>> >                           Event.CATEGORY_PATCH_DELEGATED)
>>> > -        self.assertEventFields(events[2], previous_delegate=delegate_a,
>>> > -                               current_delegate=delegate_b)
>>> > +
>>> > +        payload = events[2].payload
>>> > +        self.assertIn('previous_delegate', payload)
>>> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
>>> > +        self.assertIn('current_delegate', payload)
>>> > +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
>>> >  
>>> >          # Delegate B -> None
>>> >  
>>> > @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
>>> >          patch.save()
>>> >  
>>> >          events = _get_events(patch=patch)
>>> > +
>>> >          self.assertEqual(events.count(), 4)
>>> >          self.assertEqual(events[3].category,
>>> >                           Event.CATEGORY_PATCH_DELEGATED)
>>> > -        self.assertEventFields(events[3], previous_delegate=delegate_b)
>>> >  
>>> > +        payload = events[3].payload
>>> > +        self.assertIn('previous_delegate', payload)
>>> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
>>> > +        self.assertIn('current_delegate', payload)
>>> > +        self.assertIsNone(payload['current_delegate'])
>>> >  
>>> > -class CheckCreateTest(_BaseTestCase):
>>> > +
>>> > +class CheckCreateTest(TestCase):
>>> >  
>>> >      def test_check_created(self):
>>> >          check = utils.create_check()
>>> > -        events = _get_events(created_check=check)
>>> > -        self.assertEqual(events.count(), 1)
>>> > -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
>>> > -        self.assertEqual(events[0].project, check.patch.project)
>>> > -        self.assertEventFields(events[0])
>>> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
>>> > +        events = _get_events(patch=check.patch)
>>> >  
>>> > +        self.assertEqual(events.count(), 2)
>>> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
>>> > +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
>>> > +        self.assertEqual(events[1].project, check.patch.project)
>>> > +
>>> > +        payload = events[1].payload
>>> > +        self.assertIn('check', payload)
>>> > +        self.assertEqual(payload['check']['id'], check.id)
>>> >  
>>> > -class CoverCreateTest(_BaseTestCase):
>>> > +
>>> > +class CoverCreateTest(TestCase):
>>> >  
>>> >      def test_cover_created(self):
>>> >          cover = utils.create_cover()
>>> > @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
>>> >          self.assertEqual(events.count(), 1)
>>> >          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
>>> >          self.assertEqual(events[0].project, cover.project)
>>> > -        self.assertEventFields(events[0])
>>> > +
>>> > +        payload = events[0].payload
>>> > +        self.assertIn('cover', payload)
>>> > +        self.assertEqual(payload['cover']['id'], cover.id)
>>> >  
>>> >  
>>> > -class SeriesCreateTest(_BaseTestCase):
>>> > +class SeriesCreateTest(TestCase):
>>> >  
>>> >      def test_series_created(self):
>>> >          series = utils.create_series()
>>> > @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
>>> >          self.assertEqual(events.count(), 1)
>>> >          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
>>> >          self.assertEqual(events[0].project, series.project)
>>> > -        self.assertEventFields(events[0])
>>> > +
>>> > +        payload = events[0].payload
>>> > +        self.assertIn('series', payload)
>>> > +        self.assertEqual(payload['series']['id'], series.id)
>>> > -- 
>>> > 2.14.3
>>> > 
>>> > _______________________________________________
>>> > Patchwork mailing list
>>> > Patchwork@lists.ozlabs.org
>>> > https://lists.ozlabs.org/listinfo/patchwork
Stephen Finucane April 12, 2018, 11:50 a.m. UTC | #6
On Thu, 2018-04-12 at 11:50 +1000, Daniel Axtens wrote:
> Daniel Axtens <dja@axtens.net> writes:
> > Stephen Finucane <stephen@that.guru> writes:
> > > On Wed, 2018-04-11 at 02:23 +1000, Daniel Axtens wrote:

[snip]

> > > > > +def generate_payload(apps, schema_editor):
> > > > > +    # NOTE(stephenfin): We could convert this to native SQL in the future
> > > > > +    # by using e.g. 'JSON_OBJECT' for MySQL
> > > > > +    EventBase = apps.get_model('patchwork', 'Event')
> > > > > +
> > > > > +    for event in EventBase.objects.all():
> > > > > +        category = event.category
> > > > > +        # TODO(stephenfin): This logic should be moved into the EventSerializer
> > > > > +        # class
> > > > > +        if category == Event.CATEGORY_COVER_CREATED:
> > > > > +            payload_obj = Payload(cover=event.cover)
> > > > > +        elif category == Event.CATEGORY_PATCH_CREATED:
> > > > > +            payload_obj = Payload(patch=event.patch)
> > > > > +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
> > > > > +            payload_obj = Payload(patch=event.patch,
> > > > > +                                  previous_state=event.previous_state,
> > > > > +                                  current_state=event.previous_state)
> > > > > +        elif category == Event.CATEGORY_PATCH_DELEGATED:
> > > > > +            payload_obj = Payload(patch=event.patch,
> > > > > +                                  previous_delegate=event.previous_delegate,
> > > > > +                                  current_delegate=event.current_delegate)
> > > > > +        elif category == Event.CATEGORY_PATCH_COMPLETED:
> > > > > +            payload_obj = Payload(patch=event.patch)
> > > > > +        elif category == Event.CATEGORY_CHECK_CREATED:
> > > > > +            payload_obj = Payload(patch=event.patch, check=event.created_check)
> > > > > +        elif category == Event.CATEGORY_SERIES_CREATED:
> > > > > +            payload_obj = Payload(series=event.series)
> > > > > +        elif category == Event.CATEGORY_SERIES_COMPLETED:
> > > > > +            payload_obj = Payload(series=event.series)
> > > > > +
> > > > > +        payload = PayloadSerializer(payload_obj).data
> > 
> > Converting this to json.dumps(PayloadSerializer(payload_obj).data)
> > WFM. It now 'only' uses about a third of my memory for 318k events.
> > 
> > The results seem to render although they are more nested than I remember:
> > 
> > {
> >         "id": 318637,
> >         "category": "patch-completed",
> >         "project": {
> >             "id": 2,
> >             "url": "http://localhost:8000/api/projects/2/",
> >             "name": "Kernel Team",
> >             "link_name": "kernel-team",
> >             "list_id": "kernel-team.lists.ubuntu.com",
> >             "list_email": "kernel-team@lists.ubuntu.com",
> >             "web_url": "",
> >             "scm_url": "",
> >             "webscm_url": ""
> >         },
> >         "date": "2018-04-10T15:24:19.650185",
> >         "payload": {
> >             "patch": {
> >                 "id": 105448,
> >                 "url": "http://localhost:8000/api/patches/105448/",
> >                 "msgid": "<20170828081014.24028-1-po-hsu.lin@canonical.com>",
> >                 "date": "2017-08-28T08:10:14",
> >                 "name": "[CVE-2016-10200,SRU,Trusty] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{, 6}_bind()",
> >                 "mbox": "http://localhost:8000/patch/105448/mbox/"
> >             }
> >         }
> >     },
> > 
> > ISTR they looked a bit different previously, although I can't remember
> > if the details (here things like mbox) were previous at the top level or
> > in some object.
> > 
> > Timings to come.
> 
> Preliminary results with 100 request to the old (non-refactored +
> prefetch_related) vs 100 requests to the new (this series):
> 
> Median time for old: 0.226s
>             for new: 0.298s
> 
> So this appears to be slower. I haven't done any rigorous testing or
> statistics though.
> 
> You can do better by changing select_related to prefetch_related. With
> that, I see a median of 0.197s. I don't think the complexity this
> patchset brings is justified by a ~30ms/req improvement though.

Totally agree. Wow, I guess the select_related -> prefetch_related and
JSON renderer changes made a bigger difference than I could have
imagined. I'm happy to drop most of this. Nice work!

The only aspect I would like to keep is a variant of the last two bits.
From my brief testing, it seems like it's the filtering that's causing
the huge queries - not the JSON renderer itself. If we can disable this
from the web UI or, better again, switch to a AJAX-driven system, we'd
be able to speed up not only '/events' but everything else. Thoughts?

Stephen
Daniel Axtens April 12, 2018, 3:15 p.m. UTC | #7
>> Preliminary results with 100 request to the old (non-refactored +
>> prefetch_related) vs 100 requests to the new (this series):
>> 
>> Median time for old: 0.226s
>>             for new: 0.298s
>> 
>> So this appears to be slower. I haven't done any rigorous testing or
>> statistics though.
>> 
>> You can do better by changing select_related to prefetch_related. With
>> that, I see a median of 0.197s. I don't think the complexity this
>> patchset brings is justified by a ~30ms/req improvement though.
>
> Totally agree. Wow, I guess the select_related -> prefetch_related and
> JSON renderer changes made a bigger difference than I could have
> imagined. I'm happy to drop most of this. Nice work!
>
> The only aspect I would like to keep is a variant of the last two bits.
> From my brief testing, it seems like it's the filtering that's causing
> the huge queries - not the JSON renderer itself. If we can disable this
> from the web UI or, better again, switch to a AJAX-driven system, we'd
> be able to speed up not only '/events' but everything else. Thoughts?

I'm not exactly sure what parts constitute the "last two bits". But, I'm
mostly concerned with changes that touch the database schema, so if it
doesn't do that I'm probably fine. If you can identify the bits you are
talking about I'm happy to review them more closely.

Regards,
Daniel

>
> Stephen
diff mbox series

Patch

diff --git a/patchwork/api/event.py b/patchwork/api/event.py
index 9879a9f6..482dec1e 100644
--- a/patchwork/api/event.py
+++ b/patchwork/api/event.py
@@ -17,14 +17,15 @@ 
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-from collections import OrderedDict
 import json
 
 from rest_framework.generics import ListAPIView
+from rest_framework.reverse import reverse
 from rest_framework.serializers import ModelSerializer
-from rest_framework.serializers import SerializerMethodField
 from rest_framework.renderers import JSONRenderer
 from rest_framework.renderers import TemplateHTMLRenderer
+from rest_framework.serializers import ReadOnlyField
+from rest_framework.serializers import Serializer
 
 from patchwork.api.embedded import CheckSerializer
 from patchwork.api.embedded import CoverLetterSerializer
@@ -33,58 +34,81 @@  from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
 from patchwork.api.embedded import UserSerializer
 from patchwork.api.filters import EventFilter
-from patchwork.api.patch import StateField
 from patchwork.models import Event
 
 
-class EventSerializer(ModelSerializer):
+class StateField(ReadOnlyField):
 
-    project = ProjectSerializer(read_only=True)
-    patch = PatchSerializer(read_only=True)
-    series = SeriesSerializer(read_only=True)
-    cover = CoverLetterSerializer(read_only=True)
+    def to_representation(self, obj):
+        return '-'.join(obj.name.lower().split())
+
+
+class PayloadField(ReadOnlyField):
+
+    def to_representation(self, value):
+        request = self.context['request']
+
+        # set the 'url' and 'mbox' fields for any embedded 'cover',
+        # 'patch', and 'series' fields
+        for field in ('cover', 'patch', 'series'):
+            if not value.get(field):
+                continue
+
+            for subfield, lookup, url_name in (
+                    ('url', 'pk', 'api-{}-detail'),
+                    ('mbox', '{}_id', '{}-mbox')):
+                value[field][subfield] = reverse(
+                    url_name.format(field),
+                    request=request,
+                    kwargs={
+                        lookup.format(field): value[field]['id'],
+                    })
+
+        # set the 'url' field for any embedded '*_delegate' fields
+        for field in ('previous_delegate', 'current_delegate'):
+            if not value.get(field):
+                continue
+
+            value[field]['url'] = reverse(
+                'api-user-detail',
+                request=request,
+                kwargs={
+                    'pk': value[field]['id']
+                })
+
+        # set the 'url' field for any embedded 'check' fields
+        if value.get('check'):
+            value['check']['url'] = reverse(
+                'api-check-detail',
+                request=request,
+                kwargs={
+                    'patch_id': value['patch']['id'],
+                    'check_id': value['check']['id']
+                })
+
+        return value
+
+
+class PayloadSerializer(Serializer):
+
+    cover = CoverLetterSerializer(required=False)
+    patch = PatchSerializer(required=False)
+    series = SeriesSerializer(required=False)
     previous_state = StateField()
     current_state = StateField()
-    previous_delegate = UserSerializer()
-    current_delegate = UserSerializer()
-    created_check = SerializerMethodField()
-    created_check = CheckSerializer()
-
-    _category_map = {
-        Event.CATEGORY_COVER_CREATED: ['cover'],
-        Event.CATEGORY_PATCH_CREATED: ['patch'],
-        Event.CATEGORY_PATCH_COMPLETED: ['patch', 'series'],
-        Event.CATEGORY_PATCH_STATE_CHANGED: ['patch', 'previous_state',
-                                             'current_state'],
-        Event.CATEGORY_PATCH_DELEGATED: ['patch', 'previous_delegate',
-                                         'current_delegate'],
-        Event.CATEGORY_CHECK_CREATED: ['patch', 'created_check'],
-        Event.CATEGORY_SERIES_CREATED: ['series'],
-        Event.CATEGORY_SERIES_COMPLETED: ['series'],
-    }
-
-    def to_representation(self, instance):
-        data = super(EventSerializer, self).to_representation(instance)
-        payload = OrderedDict()
-        kept_fields = self._category_map[instance.category] + [
-            'id', 'category', 'project', 'date']
-
-        for field in [x for x in data]:
-            if field not in kept_fields:
-                del data[field]
-            elif field in self._category_map[instance.category]:
-                field_name = 'check' if field == 'created_check' else field
-                payload[field_name] = data.pop(field)
-
-        data['payload'] = payload
-
-        return data
+    previous_delegate = UserSerializer(required=False)
+    current_delegate = UserSerializer(required=False)
+    check = CheckSerializer(required=False)
+
+
+class EventSerializer(ModelSerializer):
+
+    project = ProjectSerializer(read_only=True)
+    payload = PayloadField()
 
     class Meta:
         model = Event
-        fields = ('id', 'category', 'project', 'date', 'patch', 'series',
-                  'cover', 'previous_state', 'current_state',
-                  'previous_delegate', 'current_delegate', 'created_check')
+        fields = ('id', 'category', 'project', 'date', 'payload')
         read_only_fields = fields
 
 
@@ -110,8 +134,4 @@  class EventList(ListAPIView):
     ordering = '-date'
 
     def get_queryset(self):
-        return Event.objects.all()\
-            .prefetch_related('project', 'patch', 'series', 'cover',
-                              'previous_state', 'current_state',
-                              'previous_delegate', 'current_delegate',
-                              'created_check')
+        return Event.objects.all().select_related('project')
diff --git a/patchwork/fields.py b/patchwork/fields.py
index 502558be..0ec5fe95 100644
--- a/patchwork/fields.py
+++ b/patchwork/fields.py
@@ -20,7 +20,9 @@ 
 
 from __future__ import absolute_import
 
+from collections import OrderedDict
 import hashlib
+import json
 
 from django.db import models
 from django.utils import six
@@ -44,3 +46,47 @@  class HashField(models.CharField):
 
     def db_type(self, connection=None):
         return 'char(%d)' % self.n_bytes
+
+
+class JSONField(models.Field):
+    """A basic field for loading/storing JSON.
+
+    We use this rather than Django's own JSONField because we don't need to
+    index this field and said field does not support MySQL yet.
+    """
+
+    def get_internal_type(self):
+        return 'TextField'
+
+    def get_prep_value(self, value):
+        # This is necessary for Django 1.8.x, which called 'smart_text' from
+        # 'django.utils.encoding' on all data
+        return value
+
+    def get_db_prep_value(self, value, connection, prepared=False):
+        value = super(JSONField, self).get_db_prep_value(
+            value, connection, prepared)
+        return json.dumps(value)
+
+    def from_db_value(self, value, *args, **kwargs):
+        if value is None:
+            return value
+
+        return json.loads(value, object_pairs_hook=OrderedDict)
+
+    def db_type(self, connection):
+        if connection.vendor == 'postgresql':
+            if connection.pg_version >= 90400:
+                return 'jsonb'
+            return 'text'
+        if connection.vendor == 'mysql':
+            if connection.mysql_version >= (5, 7, 8):
+                return 'json'
+            return 'longtext'
+        return 'text'
+
+    def to_python(self, value):
+        if value is None or isinstance(value, OrderedDict):
+            return value
+
+        return json.loads(value, object_pairs_hook=OrderedDict)
diff --git a/patchwork/migrations/0026_add_event_payload_field.py b/patchwork/migrations/0026_add_event_payload_field.py
new file mode 100644
index 00000000..adbdfe2a
--- /dev/null
+++ b/patchwork/migrations/0026_add_event_payload_field.py
@@ -0,0 +1,21 @@ 
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+import patchwork.fields
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('patchwork', '0025_add_regex_validators'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='event',
+            name='payload',
+            field=patchwork.fields.JSONField(blank=True, null=True),
+        ),
+    ]
diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
new file mode 100644
index 00000000..5dca85c5
--- /dev/null
+++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
@@ -0,0 +1,61 @@ 
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+from patchwork.api.event import PayloadSerializer
+from patchwork.models import Event
+
+
+# TODO(stephenfin): Move this to 'api/event' and into Serializer
+class Payload(object):
+
+    def __init__(self, **kwargs):
+        for kwarg in kwargs:
+            setattr(self, kwarg, kwargs[kwarg])
+
+
+def generate_payload(apps, schema_editor):
+    # NOTE(stephenfin): We could convert this to native SQL in the future
+    # by using e.g. 'JSON_OBJECT' for MySQL
+    EventBase = apps.get_model('patchwork', 'Event')
+
+    for event in EventBase.objects.all():
+        category = event.category
+        # TODO(stephenfin): This logic should be moved into the EventSerializer
+        # class
+        if category == Event.CATEGORY_COVER_CREATED:
+            payload_obj = Payload(cover=event.cover)
+        elif category == Event.CATEGORY_PATCH_CREATED:
+            payload_obj = Payload(patch=event.patch)
+        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
+            payload_obj = Payload(patch=event.patch,
+                                  previous_state=event.previous_state,
+                                  current_state=event.previous_state)
+        elif category == Event.CATEGORY_PATCH_DELEGATED:
+            payload_obj = Payload(patch=event.patch,
+                                  previous_delegate=event.previous_delegate,
+                                  current_delegate=event.current_delegate)
+        elif category == Event.CATEGORY_PATCH_COMPLETED:
+            payload_obj = Payload(patch=event.patch)
+        elif category == Event.CATEGORY_CHECK_CREATED:
+            payload_obj = Payload(patch=event.patch, check=event.created_check)
+        elif category == Event.CATEGORY_SERIES_CREATED:
+            payload_obj = Payload(series=event.series)
+        elif category == Event.CATEGORY_SERIES_COMPLETED:
+            payload_obj = Payload(series=event.series)
+
+        payload = PayloadSerializer(payload_obj).data
+        event.payload = payload
+        event.save()
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('patchwork', '0026_add_event_payload_field'),
+    ]
+
+    operations = [
+        migrations.RunPython(generate_payload, atomic=True),
+    ]
diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
new file mode 100644
index 00000000..2f7ba7bf
--- /dev/null
+++ b/patchwork/migrations/0028_remove_old_event_fields.py
@@ -0,0 +1,34 @@ 
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='event',
+            name='created_check',
+        ),
+        migrations.RemoveField(
+            model_name='event',
+            name='current_delegate',
+        ),
+        migrations.RemoveField(
+            model_name='event',
+            name='current_state',
+        ),
+        migrations.RemoveField(
+            model_name='event',
+            name='previous_delegate',
+        ),
+        migrations.RemoveField(
+            model_name='event',
+            name='previous_state',
+        ),
+    ]
diff --git a/patchwork/models.py b/patchwork/models.py
index f91b994c..a5992d68 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -37,6 +37,7 @@  from django.utils.functional import cached_property
 from patchwork.compat import is_authenticated
 from patchwork.compat import reverse
 from patchwork.fields import HashField
+from patchwork.fields import JSONField
 from patchwork.hasher import hash_diff
 
 if settings.ENABLE_REST_API:
@@ -943,32 +944,9 @@  class Event(models.Model):
         on_delete=models.CASCADE,
         help_text='The cover letter that this event was created for.')
 
-    # fields for 'patch-state-changed' events
+    # additional data
 
-    previous_state = models.ForeignKey(
-        State, related_name='+', null=True, blank=True,
-        on_delete=models.CASCADE)
-    current_state = models.ForeignKey(
-        State, related_name='+', null=True, blank=True,
-        on_delete=models.CASCADE)
-
-    # fields for 'patch-delegate-changed' events
-
-    previous_delegate = models.ForeignKey(
-        User, related_name='+', null=True, blank=True,
-        on_delete=models.CASCADE)
-    current_delegate = models.ForeignKey(
-        User, related_name='+', null=True, blank=True,
-        on_delete=models.CASCADE)
-
-    # fields or 'patch-check-created' events
-
-    created_check = models.ForeignKey(
-        Check, related_name='+', null=True, blank=True,
-        on_delete=models.CASCADE)
-
-    # TODO(stephenfin): Validate that the correct fields are being set by way
-    # of a 'clean' method
+    payload = JSONField(null=True, blank=True)
 
     def __repr__(self):
         return "<Event id='%d' category='%s'" % (self.id, self.category)
diff --git a/patchwork/signals.py b/patchwork/signals.py
index b083b51a..c0f4c08d 100644
--- a/patchwork/signals.py
+++ b/patchwork/signals.py
@@ -24,6 +24,7 @@  from django.db.models.signals import post_save
 from django.db.models.signals import pre_save
 from django.dispatch import receiver
 
+from patchwork.api.event import PayloadSerializer
 from patchwork.models import Check
 from patchwork.models import CoverLetter
 from patchwork.models import Event
@@ -33,6 +34,14 @@  from patchwork.models import Series
 from patchwork.models import SeriesPatch
 
 
+# TODO(stephenfin): Move this to 'api/event' and into Serializer
+class Payload(object):
+
+    def __init__(self, **kwargs):
+        for kwarg in kwargs:
+            setattr(self, kwarg, kwargs[kwarg])
+
+
 @receiver(pre_save, sender=Patch)
 def patch_change_callback(sender, instance, raw, **kwargs):
     # we only want notification of modified patches
@@ -73,9 +82,13 @@  def patch_change_callback(sender, instance, raw, **kwargs):
 def create_cover_created_event(sender, instance, created, raw, **kwargs):
 
     def create_event(cover):
+        payload = PayloadSerializer(Payload(
+            cover=cover))
+
         return Event.objects.create(
             category=Event.CATEGORY_COVER_CREATED,
             project=cover.project,
+            payload=payload.data,
             cover=cover)
 
     # don't trigger for items loaded from fixtures or new items
@@ -88,9 +101,13 @@  def create_cover_created_event(sender, instance, created, raw, **kwargs):
 def create_patch_created_event(sender, instance, created, raw, **kwargs):
 
     def create_event(patch):
+        payload = PayloadSerializer(Payload(
+            patch=patch))
+
         return Event.objects.create(
             category=Event.CATEGORY_PATCH_CREATED,
             project=patch.project,
+            payload=payload.data,
             patch=patch)
 
     # don't trigger for items loaded from fixtures or new items
@@ -103,12 +120,16 @@  def create_patch_created_event(sender, instance, created, raw, **kwargs):
 def create_patch_state_changed_event(sender, instance, raw, **kwargs):
 
     def create_event(patch, before, after):
+        payload = PayloadSerializer(Payload(
+            patch=patch,
+            previous_state=before,
+            current_state=after))
+
         return Event.objects.create(
             category=Event.CATEGORY_PATCH_STATE_CHANGED,
             project=patch.project,
-            patch=patch,
-            previous_state=before,
-            current_state=after)
+            payload=payload.data,
+            patch=patch)
 
     # don't trigger for items loaded from fixtures or new items
     if raw or not instance.pk:
@@ -125,12 +146,16 @@  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
 def create_patch_delegated_event(sender, instance, raw, **kwargs):
 
     def create_event(patch, before, after):
+        payload = PayloadSerializer(Payload(
+            patch=patch,
+            previous_delegate=before,
+            current_delegate=after))
+
         return Event.objects.create(
             category=Event.CATEGORY_PATCH_DELEGATED,
             project=patch.project,
-            patch=patch,
-            previous_delegate=before,
-            current_delegate=after)
+            payload=payload.data,
+            patch=patch)
 
     # don't trigger for items loaded from fixtures or new items
     if raw or not instance.pk:
@@ -148,9 +173,14 @@  def create_patch_completed_event(sender, instance, created, raw, **kwargs):
     """Create patch completed event for patches with series."""
 
     def create_event(patch, series):
+        payload = PayloadSerializer(Payload(
+            patch=patch,
+            series=series))
+
         return Event.objects.create(
             category=Event.CATEGORY_PATCH_COMPLETED,
             project=patch.project,
+            payload=payload.data,
             patch=patch,
             series=series)
 
@@ -182,13 +212,17 @@  def create_patch_completed_event(sender, instance, created, raw, **kwargs):
 def create_check_created_event(sender, instance, created, raw, **kwargs):
 
     def create_event(check):
+        payload = PayloadSerializer(Payload(
+            patch=check.patch,
+            check=check))
+
         # TODO(stephenfin): It might make sense to add a 'project' field to
         # 'check' to prevent lookups here and in the REST API
         return Event.objects.create(
             category=Event.CATEGORY_CHECK_CREATED,
             project=check.patch.project,
-            patch=check.patch,
-            created_check=check)
+            payload=payload.data,
+            patch=check.patch)
 
     # don't trigger for items loaded from fixtures or existing items
     if raw or not created:
@@ -200,9 +234,13 @@  def create_check_created_event(sender, instance, created, raw, **kwargs):
 def create_series_created_event(sender, instance, created, raw, **kwargs):
 
     def create_event(series):
+        payload = PayloadSerializer(Payload(
+            series=series))
+
         return Event.objects.create(
             category=Event.CATEGORY_SERIES_CREATED,
             project=series.project,
+            payload=payload.data,
             series=series)
 
     # don't trigger for items loaded from fixtures or existing items
@@ -227,9 +265,13 @@  def create_series_completed_event(sender, instance, created, raw, **kwargs):
     # in that case.
 
     def create_event(series):
+        payload = PayloadSerializer(Payload(
+            series=series))
+
         return Event.objects.create(
             category=Event.CATEGORY_SERIES_COMPLETED,
             project=series.project,
+            payload=payload.data,
             series=series)
 
     # don't trigger for items loaded from fixtures or existing items
diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
index 70d563de..39dff6f1 100644
--- a/patchwork/tests/test_events.py
+++ b/patchwork/tests/test_events.py
@@ -17,32 +17,20 @@ 
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+from collections import OrderedDict
+
 from django.test import TestCase
 
 from patchwork.models import Event
 from patchwork.tests import utils
 
-BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
-               'current_delegate']
-
 
 def _get_events(**filters):
     # These are sorted by reverse normally, so reverse it once again
     return Event.objects.filter(**filters).order_by('date')
 
 
-class _BaseTestCase(TestCase):
-
-    def assertEventFields(self, event, parent_type='patch', **fields):
-        for field_name in [x for x in BASE_FIELDS]:
-            field = getattr(event, field_name)
-            if field_name in fields:
-                self.assertEqual(field, fields[field_name])
-            else:
-                self.assertIsNone(field)
-
-
-class PatchCreateTest(_BaseTestCase):
+class PatchCreateTest(TestCase):
 
     def test_patch_created(self):
         """No series, so patch dependencies implicitly exist."""
@@ -51,10 +39,15 @@  class PatchCreateTest(_BaseTestCase):
         # This should raise both the CATEGORY_PATCH_CREATED and
         # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
         events = _get_events(patch=patch)
+
         self.assertEqual(events.count(), 1)
         self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
         self.assertEqual(events[0].project, patch.project)
-        self.assertEventFields(events[0])
+
+        payload = events[0].payload
+        self.assertIsInstance(payload, OrderedDict)
+        self.assertIn('patch', payload)
+        self.assertEqual(payload['patch']['id'], patch.id)
 
     def test_patch_dependencies_present_series(self):
         """Patch dependencies already exist."""
@@ -63,13 +56,22 @@  class PatchCreateTest(_BaseTestCase):
         # This should raise both the CATEGORY_PATCH_CREATED and
         # CATEGORY_PATCH_COMPLETED events
         events = _get_events(patch=series_patch.patch)
+
         self.assertEqual(events.count(), 2)
         self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
         self.assertEqual(events[0].project, series_patch.patch.project)
         self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
         self.assertEqual(events[1].project, series_patch.patch.project)
-        self.assertEventFields(events[0])
-        self.assertEventFields(events[1])
+
+        payload = events[0].payload
+        self.assertIn('patch', payload)
+        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
+
+        payload = events[1].payload
+        self.assertIn('patch', payload)
+        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
+        self.assertIn('series', payload)
+        self.assertEqual(payload['series']['id'], series_patch.series.id)
 
     def test_patch_dependencies_out_of_order(self):
         series = utils.create_series()
@@ -82,7 +84,6 @@  class PatchCreateTest(_BaseTestCase):
             events = _get_events(patch=series_patch.patch)
             self.assertEqual(events.count(), 1)
             self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
-            self.assertEventFields(events[0])
 
         series_patch_1 = utils.create_series_patch(series=series, number=1)
 
@@ -94,8 +95,6 @@  class PatchCreateTest(_BaseTestCase):
             self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
             self.assertEqual(events[1].category,
                              Event.CATEGORY_PATCH_COMPLETED)
-            self.assertEventFields(events[0])
-            self.assertEventFields(events[1])
 
     def test_patch_dependencies_missing(self):
         series_patch = utils.create_series_patch(number=2)
@@ -105,10 +104,9 @@  class PatchCreateTest(_BaseTestCase):
         events = _get_events(patch=series_patch.patch)
         self.assertEqual(events.count(), 1)
         self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
-        self.assertEventFields(events[0])
 
 
-class PatchChangedTest(_BaseTestCase):
+class PatchChangedTest(TestCase):
 
     def test_patch_state_changed(self):
         patch = utils.create_patch()
@@ -119,13 +117,18 @@  class PatchChangedTest(_BaseTestCase):
         patch.save()
 
         events = _get_events(patch=patch)
+
         self.assertEqual(events.count(), 2)
         # we don't care about the CATEGORY_PATCH_CREATED event here
         self.assertEqual(events[1].category,
                          Event.CATEGORY_PATCH_STATE_CHANGED)
         self.assertEqual(events[1].project, patch.project)
-        self.assertEventFields(events[1], previous_state=old_state,
-                               current_state=new_state)
+
+        payload = events[1].payload
+        self.assertIn('previous_state', payload)
+        self.assertEqual(payload['previous_state'], old_state.slug)
+        self.assertIn('current_state', payload)
+        self.assertEqual(payload['current_state'], new_state.slug)
 
     def test_patch_delegated(self):
         patch = utils.create_patch()
@@ -137,12 +140,18 @@  class PatchChangedTest(_BaseTestCase):
         patch.save()
 
         events = _get_events(patch=patch)
+
         self.assertEqual(events.count(), 2)
         # we don't care about the CATEGORY_PATCH_CREATED event here
         self.assertEqual(events[1].category,
                          Event.CATEGORY_PATCH_DELEGATED)
         self.assertEqual(events[1].project, patch.project)
-        self.assertEventFields(events[1], current_delegate=delegate_a)
+
+        payload = events[1].payload
+        self.assertIn('previous_delegate', payload)
+        self.assertIsNone(payload['previous_delegate'])
+        self.assertIn('current_delegate', payload)
+        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
 
         delegate_b = utils.create_user()
 
@@ -152,11 +161,16 @@  class PatchChangedTest(_BaseTestCase):
         patch.save()
 
         events = _get_events(patch=patch)
+
         self.assertEqual(events.count(), 3)
         self.assertEqual(events[2].category,
                          Event.CATEGORY_PATCH_DELEGATED)
-        self.assertEventFields(events[2], previous_delegate=delegate_a,
-                               current_delegate=delegate_b)
+
+        payload = events[2].payload
+        self.assertIn('previous_delegate', payload)
+        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
+        self.assertIn('current_delegate', payload)
+        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
 
         # Delegate B -> None
 
@@ -164,24 +178,36 @@  class PatchChangedTest(_BaseTestCase):
         patch.save()
 
         events = _get_events(patch=patch)
+
         self.assertEqual(events.count(), 4)
         self.assertEqual(events[3].category,
                          Event.CATEGORY_PATCH_DELEGATED)
-        self.assertEventFields(events[3], previous_delegate=delegate_b)
 
+        payload = events[3].payload
+        self.assertIn('previous_delegate', payload)
+        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
+        self.assertIn('current_delegate', payload)
+        self.assertIsNone(payload['current_delegate'])
 
-class CheckCreateTest(_BaseTestCase):
+
+class CheckCreateTest(TestCase):
 
     def test_check_created(self):
         check = utils.create_check()
-        events = _get_events(created_check=check)
-        self.assertEqual(events.count(), 1)
-        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
-        self.assertEqual(events[0].project, check.patch.project)
-        self.assertEventFields(events[0])
+        # we don't care about the CATEGORY_PATCH_CREATED event here
+        events = _get_events(patch=check.patch)
 
+        self.assertEqual(events.count(), 2)
+        # we don't care about the CATEGORY_PATCH_CREATED event here
+        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
+        self.assertEqual(events[1].project, check.patch.project)
+
+        payload = events[1].payload
+        self.assertIn('check', payload)
+        self.assertEqual(payload['check']['id'], check.id)
 
-class CoverCreateTest(_BaseTestCase):
+
+class CoverCreateTest(TestCase):
 
     def test_cover_created(self):
         cover = utils.create_cover()
@@ -189,10 +215,13 @@  class CoverCreateTest(_BaseTestCase):
         self.assertEqual(events.count(), 1)
         self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
         self.assertEqual(events[0].project, cover.project)
-        self.assertEventFields(events[0])
+
+        payload = events[0].payload
+        self.assertIn('cover', payload)
+        self.assertEqual(payload['cover']['id'], cover.id)
 
 
-class SeriesCreateTest(_BaseTestCase):
+class SeriesCreateTest(TestCase):
 
     def test_series_created(self):
         series = utils.create_series()
@@ -200,4 +229,7 @@  class SeriesCreateTest(_BaseTestCase):
         self.assertEqual(events.count(), 1)
         self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
         self.assertEqual(events[0].project, series.project)
-        self.assertEventFields(events[0])
+
+        payload = events[0].payload
+        self.assertIn('series', payload)
+        self.assertEqual(payload['series']['id'], series.id)