Cache serialized entities when fetching many annotations

For a scene with ~5600 annotations, of which ~3100 are entities, this cuts fetching the scene from 12 seconds to 2 seconds.
2015-09-14 14:08:02 +02:00 · 2015-09-14 14:08:02 +02:00 · 8759b569da
commit 8759b569da
parent eebb0b5681
3 changed files with 42 additions and 11 deletions
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@ -208,13 +208,36 @@ class Annotation(models.Model):
        from entity.models import Entity
        return Entity.get(self.value)

+    def _get_entity_json(self, user=None, entity_cache=None):
+        """When serializing many annotations pointing to the same entity, it is expensive to
+        repeatedly look up and serialize the same entity.
+
+        TODO: if Entity were a (nullable) foreign key of Annotation, we could just:
+
+            prefetch_related('entity', 'entity__user', 'entity__documents')
+
+        before serializing the annotations, which would make self.entity.json(user=user) cheap and
+        all this unnecessary.
+        """
+        if entity_cache is not None and self.value in entity_cache:
+            return entity_cache[self.value]
+
+        entity = self.get_entity()
+        entity_json = entity.json(user=user)
+        value = entity.annotation_value()
+
+        if entity_cache is not None:
+            entity_cache[self.value] = (entity_json, value)
+
+        return (entity_json, value)
+
    annotation_keys = (
        'id', 'in', 'out', 'value', 'created', 'modified',
        'duration', 'layer', 'item', 'videoRatio', 'languages', 
        'entity', 'event', 'place'
    )
    _clip_keys = ('hue', 'lightness', 'saturation', 'volume')
-    def json(self, layer=False, keys=None, user=None):
+    def json(self, layer=False, keys=None, user=None, entity_cache=None):
        j = {
            'user': self.user.username,
            'id': self.public_id,
@ -232,9 +255,8 @@ class Annotation(models.Model):
        l = self.get_layer()
        if l['type'] == 'entity':
            try:
-                entity = self.get_entity()
-                j['entity'] = entity.json(user=user)
-                j['value'] = entity.annotation_value()
+                (j['entity'], j['value']) = self._get_entity_json(
+                    user=user, entity_cache=entity_cache)
            except:
                j['entity'] = {}
        elif l['type'] == 'event':
--- a/pandora/annotation/views.py
+++ b/pandora/annotation/views.py
@ -100,7 +100,11 @@ def findAnnotations(request, data):
    qs = order_query(query['qs'], query['sort'])
    if 'keys' in data:
        qs = qs.select_related()[query['range'][0]:query['range'][1]]
-        response['data']['items'] = [p.json(keys=data['keys']) for p in qs]
+        entity_cache = {}
+        response['data']['items'] = [
+            p.json(keys=data['keys'], entity_cache=entity_cache)
+            for p in qs
+        ]
    elif 'position' in query:
        ids = [i.public_id for i in qs]
        data['conditions'] = data['conditions'] + {
--- a/pandora/clip/models.py
+++ b/pandora/clip/models.py
@ -32,12 +32,14 @@ def get_layers(item, interval=None, user=None):
        start, end = interval
        qs = qs.filter(start__lt=end, end__gt=start)

+    entity_cache = {}
+
    for a in qs.order_by('start').select_related('user'):
-        if a.layer in private:
-            if a.user == user:
-                layers[a.layer].append(a.json(user=user))
-        else:
-            layers[a.layer].append(a.json(user=user))
+        if a.layer in private and a.user != user:
+            continue
+
+        layers[a.layer].append(a.json(user=user, entity_cache=entity_cache))
+
    return layers


@ -107,7 +109,10 @@ class MetaClip:
                annotations = self.annotations.all()
                if qs:
                    annotations = annotations.filter(qs)
-                j['annotations'] = [a.json(keys=['value', 'id', 'layer']) for a in annotations]
+                entity_cache = {}
+                j['annotations'] = [
+                    a.json(keys=['value', 'id', 'layer'], entity_cache=entity_cache) for a in annotations
+                ]
            if 'layers' in keys:
                j['layers'] = self.get_layers()
            if 'cuts' in keys: