Cache serialized entities when fetching many annotations

For a scene with ~5600 annotations, of which ~3100 are entities, this
cuts fetching the scene from 12 seconds to 2 seconds.
This commit is contained in:
Will Thompson 2015-09-14 14:08:02 +02:00 committed by j
parent eebb0b5681
commit 8759b569da
3 changed files with 42 additions and 11 deletions

View file

@ -208,13 +208,36 @@ class Annotation(models.Model):
from entity.models import Entity
return Entity.get(self.value)
def _get_entity_json(self, user=None, entity_cache=None):
"""When serializing many annotations pointing to the same entity, it is expensive to
repeatedly look up and serialize the same entity.
TODO: if Entity were a (nullable) foreign key of Annotation, we could just:
prefetch_related('entity', 'entity__user', 'entity__documents')
before serializing the annotations, which would make self.entity.json(user=user) cheap and
all this unnecessary.
"""
if entity_cache is not None and self.value in entity_cache:
return entity_cache[self.value]
entity = self.get_entity()
entity_json = entity.json(user=user)
value = entity.annotation_value()
if entity_cache is not None:
entity_cache[self.value] = (entity_json, value)
return (entity_json, value)
annotation_keys = (
'id', 'in', 'out', 'value', 'created', 'modified',
'duration', 'layer', 'item', 'videoRatio', 'languages',
'entity', 'event', 'place'
)
_clip_keys = ('hue', 'lightness', 'saturation', 'volume')
def json(self, layer=False, keys=None, user=None):
def json(self, layer=False, keys=None, user=None, entity_cache=None):
j = {
'user': self.user.username,
'id': self.public_id,
@ -232,9 +255,8 @@ class Annotation(models.Model):
l = self.get_layer()
if l['type'] == 'entity':
try:
entity = self.get_entity()
j['entity'] = entity.json(user=user)
j['value'] = entity.annotation_value()
(j['entity'], j['value']) = self._get_entity_json(
user=user, entity_cache=entity_cache)
except:
j['entity'] = {}
elif l['type'] == 'event':

View file

@ -100,7 +100,11 @@ def findAnnotations(request, data):
qs = order_query(query['qs'], query['sort'])
if 'keys' in data:
qs = qs.select_related()[query['range'][0]:query['range'][1]]
response['data']['items'] = [p.json(keys=data['keys']) for p in qs]
entity_cache = {}
response['data']['items'] = [
p.json(keys=data['keys'], entity_cache=entity_cache)
for p in qs
]
elif 'position' in query:
ids = [i.public_id for i in qs]
data['conditions'] = data['conditions'] + {

View file

@ -32,12 +32,14 @@ def get_layers(item, interval=None, user=None):
start, end = interval
qs = qs.filter(start__lt=end, end__gt=start)
entity_cache = {}
for a in qs.order_by('start').select_related('user'):
if a.layer in private:
if a.user == user:
layers[a.layer].append(a.json(user=user))
else:
layers[a.layer].append(a.json(user=user))
if a.layer in private and a.user != user:
continue
layers[a.layer].append(a.json(user=user, entity_cache=entity_cache))
return layers
@ -107,7 +109,10 @@ class MetaClip:
annotations = self.annotations.all()
if qs:
annotations = annotations.filter(qs)
j['annotations'] = [a.json(keys=['value', 'id', 'layer']) for a in annotations]
entity_cache = {}
j['annotations'] = [
a.json(keys=['value', 'id', 'layer'], entity_cache=entity_cache) for a in annotations
]
if 'layers' in keys:
j['layers'] = self.get_layers()
if 'cuts' in keys: