From af0d87b569e85879cd71d29821269e6916d98588 Mon Sep 17 00:00:00 2001
From: Will Thompson <will@willthompson.co.uk>
Date: Tue, 19 Apr 2016 10:51:15 +0100
Subject: [PATCH 1/4] Annotation.json: reduce repeated layer lookups

It's actually quite costly to look up keys in CONFIG, particularly
inside a loop: this trims ~5% off get(keys=['layers']) for
annotation-heavy items.
---
 pandora/annotation/models.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py
index a8ede67d..c274d28e 100644
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@@ -98,7 +98,7 @@ class Annotation(models.Model):
     value = models.TextField()
     findvalue = models.TextField(null=True)
     sortvalue = models.CharField(max_length=1000, null=True, blank=True, db_index=True)
-    
+
     languages = models.CharField(max_length=255, null=True, blank=True)
 
     def editable(self, user):
@@ -138,8 +138,7 @@ class Annotation(models.Model):
 
             findvalue = self.value
             try:
-                l = self.get_layer()
-                if l['type'] == 'entity':
+                if layer['type'] == 'entity':
                     findvalue = self.get_entity().name
             except:
                 pass
@@ -232,7 +231,7 @@ class Annotation(models.Model):
 
     annotation_keys = (
         'id', 'in', 'out', 'value', 'created', 'modified',
-        'duration', 'layer', 'item', 'videoRatio', 'languages', 
+        'duration', 'layer', 'item', 'videoRatio', 'languages',
         'entity', 'event', 'place'
     )
     _clip_keys = ('hue', 'lightness', 'saturation', 'volume')
@@ -295,10 +294,10 @@ class Annotation(models.Model):
                             value = getattr(self.item.sort, key)
                         if value != None:
                             j[key] = value
-        subtitles = get_by_key(settings.CONFIG['layers'], 'isSubtitles', True)
-        if subtitles:
-            if 'id' in j and self.layer == subtitles['id'] and not self.value:
-                del j['id']
+
+        if l.get('isSubtitles') and 'id' in j and not self.value:
+            del j['id']
+
         return j
 
     def __unicode__(self):

From 400b6650a2fbd69ef906360c4f4d893a7d91d93e Mon Sep 17 00:00:00 2001
From: Will Thompson <will@willthompson.co.uk>
Date: Tue, 19 Apr 2016 12:20:49 +0100
Subject: [PATCH 2/4] Annotation.json: document empty-subtitle special case

---
 pandora/annotation/models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py
index c274d28e..24d2871e 100644
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@@ -295,6 +295,9 @@ class Annotation(models.Model):
                         if value != None:
                             j[key] = value
 
+        # Items without any real subtitles are given a dummy 5-second subtitle
+        # every minute to ensure that they have at least *some* clips. Treat
+        # them specially. See Item.add_empty_clips
         if l.get('isSubtitles') and 'id' in j and not self.value:
             del j['id']
 

From aa0fbc9d4a93c90a5af0922d402d65412b6b5229 Mon Sep 17 00:00:00 2001
From: Will Thompson <will@willthompson.co.uk>
Date: Tue, 19 Apr 2016 15:08:55 +0100
Subject: [PATCH 3/4] Entity.json: get document ids from join table

This is a bit quicker because it's just a lookup in a single table, not
a join.
---
 pandora/entity/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandora/entity/models.py b/pandora/entity/models.py
index faff0e25..d515937f 100644
--- a/pandora/entity/models.py
+++ b/pandora/entity/models.py
@@ -168,8 +168,8 @@ class Entity(models.Model):
             elif key == 'sortName':
                 response[key] = self.name_sort
             elif key == 'documents':
-                response[key] = [ox.toAZ(d['id'])
-                    for d in self.documents.all().values('id').order_by('documentproperties__index')]
+                response[key] = [ox.toAZ(id_)
+                    for id_, in self.documentproperties.order_by('index').values_list('document_id')]
             elif key in self.data:
                 response[key] = self.data[key]
         return response

From aa40a405951e9d7c5546b332bddb1b52ce27eda4 Mon Sep 17 00:00:00 2001
From: Will Thompson <will@willthompson.co.uk>
Date: Fri, 15 Apr 2016 15:10:43 +0100
Subject: [PATCH 4/4] Annotation.json: only include entity id & name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fetching documents for each entity in turn is expensive. (I have tried
using ArrayAgg to fetch them in the same query as the Entity — no
improvement. It's possible that being able to join to entity_entity,
and then use ArrayAgg, would be better.)

Even once you've fetched them all, if the same entity appears many
times in an item, then get(..., keys=['layers']) duplicates the whole
JSON for the entity many times: expensive to serialize, expensive to
send over the wire.

Pandora's own web interface only depends on the 'id' key of 'entity' in
each annotation, and refetches the rest of the entity to show the pop-up
dialog when you press E. So by just not bothering to fetch and send any
other keys, get(..., keys=['layers']) on an item with many entity
annotations is substantially faster.

(I experimented with splitting the full entities off to one side, so,
you'd have:

    {
        "layers": {
            somelayer: [...,
              {..., "entity": {"id": ABC}},
            ], ...
        },
        "entities": {
            ABC: {...},
            ...
        }
    }

This is quicker than the status quo, but obviously not as fast as not
fetching & sending the rest at all!)
---
 pandora/annotation/models.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py
index 24d2871e..51b08770 100644
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@@ -209,19 +209,15 @@ class Annotation(models.Model):
     def _get_entity_json(self, user=None, entity_cache=None):
         """When serializing many annotations pointing to the same entity, it is expensive to
         repeatedly look up and serialize the same entity.
-
-        TODO: if Entity were a (nullable) foreign key of Annotation, we could just:
-
-            prefetch_related('entity', 'entity__user', 'entity__documents')
-
-        before serializing the annotations, which would make self.entity.json(user=user) cheap and
-        all this unnecessary.
         """
+        from entity.models import Entity
+
         if entity_cache is not None and self.value in entity_cache:
             return entity_cache[self.value]
 
-        entity = self.get_entity()
-        entity_json = entity.json(user=user)
+        id = ox.fromAZ(self.value)
+        entity = Entity.objects.filter(id=id).only('name').get()
+        entity_json = entity.json(keys=['id', 'name'])
         value = entity.annotation_value()
 
         if entity_cache is not None: