From 8d25e3be78e751f65375ae6cbe1a8001b83f3f32 Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 28 Jun 2016 13:11:09 +0000 Subject: [PATCH] findDocuments: improve entity query performance When I implemented this in 9a4c24c, there were not many rows in entity_documentproperties in the database here. Now that there are, computing the document_document -> entity_documentproperties -> entity_entity join and then filtering is really, really slow. Postgres seems to materialize the whole join and then scan it. If we get a set of matching document IDs for the entity query in a subquery, and then just filter with IN on that, things are much faster: scan entity_entity; in a nested loop, get the document_ids via entity_documentproperties; hash this set; and then scan document_document. Searching for a single character, this brings the query from ~1.1s to ~400ms. Searching for a full word, ~800ms to 120ms This condition is getting really ugly -- I am sorry! References #2935 --- pandora/document/managers.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandora/document/managers.py b/pandora/document/managers.py index 2772d63c9..f9d26b11f 100644 --- a/pandora/document/managers.py +++ b/pandora/document/managers.py @@ -35,14 +35,18 @@ def parseCondition(condition, user, item=None): def buildCondition(k, op, v): + import entity.models if k == 'id': v = ox.fromAZ(v) return Q(**{k: v}) if isinstance(v, bool): #featured and public flag key = k elif k == 'entity': - entity_key, v = entity.managers.namePredicate(op, v) - key = 'entities__' + entity_key + entity_key, entity_v = entity.managers.namePredicate(op, v) + key = 'id__in' + v = entity.models.DocumentProperties.objects.filter(**{ + 'entity__' + entity_key: entity_v + }).values_list('document_id', flat=True) else: key = "%s%s" % (k, { '==': '__iexact',