From af0d87b569e85879cd71d29821269e6916d98588 Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 19 Apr 2016 10:51:15 +0100 Subject: [PATCH 1/4] Annotation.json: reduce repeated layer lookups It's actually quite costly to look up keys in CONFIG, particularly inside a loop: this trims ~5% off get(keys=['layers']) for annotation-heavy items. --- pandora/annotation/models.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index a8ede67d..c274d28e 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -98,7 +98,7 @@ class Annotation(models.Model): value = models.TextField() findvalue = models.TextField(null=True) sortvalue = models.CharField(max_length=1000, null=True, blank=True, db_index=True) - + languages = models.CharField(max_length=255, null=True, blank=True) def editable(self, user): @@ -138,8 +138,7 @@ class Annotation(models.Model): findvalue = self.value try: - l = self.get_layer() - if l['type'] == 'entity': + if layer['type'] == 'entity': findvalue = self.get_entity().name except: pass @@ -232,7 +231,7 @@ class Annotation(models.Model): annotation_keys = ( 'id', 'in', 'out', 'value', 'created', 'modified', - 'duration', 'layer', 'item', 'videoRatio', 'languages', + 'duration', 'layer', 'item', 'videoRatio', 'languages', 'entity', 'event', 'place' ) _clip_keys = ('hue', 'lightness', 'saturation', 'volume') @@ -295,10 +294,10 @@ class Annotation(models.Model): value = getattr(self.item.sort, key) if value != None: j[key] = value - subtitles = get_by_key(settings.CONFIG['layers'], 'isSubtitles', True) - if subtitles: - if 'id' in j and self.layer == subtitles['id'] and not self.value: - del j['id'] + + if l.get('isSubtitles') and 'id' in j and not self.value: + del j['id'] + return j def __unicode__(self): From 400b6650a2fbd69ef906360c4f4d893a7d91d93e Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 19 Apr 2016 12:20:49 +0100 Subject: [PATCH 2/4] Annotation.json: document empty-subtitle special case --- pandora/annotation/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index c274d28e..24d2871e 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -295,6 +295,9 @@ class Annotation(models.Model): if value != None: j[key] = value + # Items without any real subtitles are given a dummy 5-second subtitle + # every minute to ensure that they have at least *some* clips. Treat + # them specially. See Item.add_empty_clips if l.get('isSubtitles') and 'id' in j and not self.value: del j['id'] From aa0fbc9d4a93c90a5af0922d402d65412b6b5229 Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 19 Apr 2016 15:08:55 +0100 Subject: [PATCH 3/4] Entity.json: get document ids from join table This is a bit quicker because it's just a lookup in a single table, not a join. --- pandora/entity/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandora/entity/models.py b/pandora/entity/models.py index faff0e25..d515937f 100644 --- a/pandora/entity/models.py +++ b/pandora/entity/models.py @@ -168,8 +168,8 @@ class Entity(models.Model): elif key == 'sortName': response[key] = self.name_sort elif key == 'documents': - response[key] = [ox.toAZ(d['id']) - for d in self.documents.all().values('id').order_by('documentproperties__index')] + response[key] = [ox.toAZ(id_) + for id_, in self.documentproperties.order_by('index').values_list('document_id')] elif key in self.data: response[key] = self.data[key] return response From aa40a405951e9d7c5546b332bddb1b52ce27eda4 Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Fri, 15 Apr 2016 15:10:43 +0100 Subject: [PATCH 4/4] Annotation.json: only include entity id & name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fetching documents for each entity in turn is expensive. (I have tried using ArrayAgg to fetch them in the same query as the Entity — no improvement. It's possible that being able to join to entity_entity, and then use ArrayAgg, would be better.) Even once you've fetched them all, if the same entity appears many times in an item, then get(..., keys=['layers']) duplicates the whole JSON for the entity many times: expensive to serialize, expensive to send over the wire. Pandora's own web interface only depends on the 'id' key of 'entity' in each annotation, and refetches the rest of the entity to show the pop-up dialog when you press E. So by just not bothering to fetch and send any other keys, get(..., keys=['layers']) on an item with many entity annotations is substantially faster. (I experimented with splitting the full entities off to one side, so, you'd have: { "layers": { somelayer: [..., {..., "entity": {"id": ABC}}, ], ... }, "entities": { ABC: {...}, ... } } This is quicker than the status quo, but obviously not as fast as not fetching & sending the rest at all!) --- pandora/annotation/models.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 24d2871e..51b08770 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -209,19 +209,15 @@ class Annotation(models.Model): def _get_entity_json(self, user=None, entity_cache=None): """When serializing many annotations pointing to the same entity, it is expensive to repeatedly look up and serialize the same entity. - - TODO: if Entity were a (nullable) foreign key of Annotation, we could just: - - prefetch_related('entity', 'entity__user', 'entity__documents') - - before serializing the annotations, which would make self.entity.json(user=user) cheap and - all this unnecessary. """ + from entity.models import Entity + if entity_cache is not None and self.value in entity_cache: return entity_cache[self.value] - entity = self.get_entity() - entity_json = entity.json(user=user) + id = ox.fromAZ(self.value) + entity = Entity.objects.filter(id=id).only('name').get() + entity_json = entity.json(keys=['id', 'name']) value = entity.annotation_value() if entity_cache is not None: