From 8759b569dac05418783ecb5c838af6e4b397f85f Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Mon, 14 Sep 2015 14:08:02 +0200 Subject: [PATCH] Cache serialized entities when fetching many annotations For a scene with ~5600 annotations, of which ~3100 are entities, this cuts fetching the scene from 12 seconds to 2 seconds. --- pandora/annotation/models.py | 30 ++++++++++++++++++++++++++---- pandora/annotation/views.py | 6 +++++- pandora/clip/models.py | 17 +++++++++++------ 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 556f44d6b..4c951b7a3 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -208,13 +208,36 @@ class Annotation(models.Model): from entity.models import Entity return Entity.get(self.value) + def _get_entity_json(self, user=None, entity_cache=None): + """When serializing many annotations pointing to the same entity, it is expensive to + repeatedly look up and serialize the same entity. + + TODO: if Entity were a (nullable) foreign key of Annotation, we could just: + + prefetch_related('entity', 'entity__user', 'entity__documents') + + before serializing the annotations, which would make self.entity.json(user=user) cheap and + all this unnecessary. + """ + if entity_cache is not None and self.value in entity_cache: + return entity_cache[self.value] + + entity = self.get_entity() + entity_json = entity.json(user=user) + value = entity.annotation_value() + + if entity_cache is not None: + entity_cache[self.value] = (entity_json, value) + + return (entity_json, value) + annotation_keys = ( 'id', 'in', 'out', 'value', 'created', 'modified', 'duration', 'layer', 'item', 'videoRatio', 'languages', 'entity', 'event', 'place' ) _clip_keys = ('hue', 'lightness', 'saturation', 'volume') - def json(self, layer=False, keys=None, user=None): + def json(self, layer=False, keys=None, user=None, entity_cache=None): j = { 'user': self.user.username, 'id': self.public_id, @@ -232,9 +255,8 @@ class Annotation(models.Model): l = self.get_layer() if l['type'] == 'entity': try: - entity = self.get_entity() - j['entity'] = entity.json(user=user) - j['value'] = entity.annotation_value() + (j['entity'], j['value']) = self._get_entity_json( + user=user, entity_cache=entity_cache) except: j['entity'] = {} elif l['type'] == 'event': diff --git a/pandora/annotation/views.py b/pandora/annotation/views.py index f11cc143b..9def1a967 100644 --- a/pandora/annotation/views.py +++ b/pandora/annotation/views.py @@ -100,7 +100,11 @@ def findAnnotations(request, data): qs = order_query(query['qs'], query['sort']) if 'keys' in data: qs = qs.select_related()[query['range'][0]:query['range'][1]] - response['data']['items'] = [p.json(keys=data['keys']) for p in qs] + entity_cache = {} + response['data']['items'] = [ + p.json(keys=data['keys'], entity_cache=entity_cache) + for p in qs + ] elif 'position' in query: ids = [i.public_id for i in qs] data['conditions'] = data['conditions'] + { diff --git a/pandora/clip/models.py b/pandora/clip/models.py index 57858f3e3..94b319cc0 100644 --- a/pandora/clip/models.py +++ b/pandora/clip/models.py @@ -32,12 +32,14 @@ def get_layers(item, interval=None, user=None): start, end = interval qs = qs.filter(start__lt=end, end__gt=start) + entity_cache = {} + for a in qs.order_by('start').select_related('user'): - if a.layer in private: - if a.user == user: - layers[a.layer].append(a.json(user=user)) - else: - layers[a.layer].append(a.json(user=user)) + if a.layer in private and a.user != user: + continue + + layers[a.layer].append(a.json(user=user, entity_cache=entity_cache)) + return layers @@ -107,7 +109,10 @@ class MetaClip: annotations = self.annotations.all() if qs: annotations = annotations.filter(qs) - j['annotations'] = [a.json(keys=['value', 'id', 'layer']) for a in annotations] + entity_cache = {} + j['annotations'] = [ + a.json(keys=['value', 'id', 'layer'], entity_cache=entity_cache) for a in annotations + ] if 'layers' in keys: j['layers'] = self.get_layers() if 'cuts' in keys: