From aa40a405951e9d7c5546b332bddb1b52ce27eda4 Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Fri, 15 Apr 2016 15:10:43 +0100 Subject: [PATCH] Annotation.json: only include entity id & name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fetching documents for each entity in turn is expensive. (I have tried using ArrayAgg to fetch them in the same query as the Entity — no improvement. It's possible that being able to join to entity_entity, and then use ArrayAgg, would be better.) Even once you've fetched them all, if the same entity appears many times in an item, then get(..., keys=['layers']) duplicates the whole JSON for the entity many times: expensive to serialize, expensive to send over the wire. Pandora's own web interface only depends on the 'id' key of 'entity' in each annotation, and refetches the rest of the entity to show the pop-up dialog when you press E. So by just not bothering to fetch and send any other keys, get(..., keys=['layers']) on an item with many entity annotations is substantially faster. (I experimented with splitting the full entities off to one side, so, you'd have: { "layers": { somelayer: [..., {..., "entity": {"id": ABC}}, ], ... }, "entities": { ABC: {...}, ... } } This is quicker than the status quo, but obviously not as fast as not fetching & sending the rest at all!) --- pandora/annotation/models.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 24d2871e..51b08770 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -209,19 +209,15 @@ class Annotation(models.Model): def _get_entity_json(self, user=None, entity_cache=None): """When serializing many annotations pointing to the same entity, it is expensive to repeatedly look up and serialize the same entity. - - TODO: if Entity were a (nullable) foreign key of Annotation, we could just: - - prefetch_related('entity', 'entity__user', 'entity__documents') - - before serializing the annotations, which would make self.entity.json(user=user) cheap and - all this unnecessary. """ + from entity.models import Entity + if entity_cache is not None and self.value in entity_cache: return entity_cache[self.value] - entity = self.get_entity() - entity_json = entity.json(user=user) + id = ox.fromAZ(self.value) + entity = Entity.objects.filter(id=id).only('name').get() + entity_json = entity.json(keys=['id', 'name']) value = entity.annotation_value() if entity_cache is not None: