From 8d25e3be78e751f65375ae6cbe1a8001b83f3f32 Mon Sep 17 00:00:00 2001
From: Will Thompson <will@willthompson.co.uk>
Date: Tue, 28 Jun 2016 13:11:09 +0000
Subject: [PATCH] findDocuments: improve entity query performance

When I implemented this in 9a4c24c, there were not many rows in
entity_documentproperties in the database here. Now that there are,
computing the document_document -> entity_documentproperties ->
entity_entity join and then filtering is really, really slow. Postgres
seems to materialize the whole join and then scan it.

If we get a set of matching document IDs for the entity query in a
subquery, and then just filter with IN on that, things are much faster:
scan entity_entity; in a nested loop, get the document_ids via
entity_documentproperties; hash this set; and then scan
document_document.

Searching for a single character, this brings the query from ~1.1s to
~400ms. Searching for a full word, ~800ms to 120ms

This condition is getting really ugly -- I am sorry!

References #2935
---
 pandora/document/managers.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandora/document/managers.py b/pandora/document/managers.py
index 2772d63c9..f9d26b11f 100644
--- a/pandora/document/managers.py
+++ b/pandora/document/managers.py
@@ -35,14 +35,18 @@ def parseCondition(condition, user, item=None):
 
 
 def buildCondition(k, op, v):
+    import entity.models
     if k == 'id':
         v = ox.fromAZ(v)
         return Q(**{k: v})
     if isinstance(v, bool): #featured and public flag
         key = k
     elif k == 'entity':
-        entity_key, v = entity.managers.namePredicate(op, v)
-        key = 'entities__' + entity_key
+        entity_key, entity_v = entity.managers.namePredicate(op, v)
+        key = 'id__in'
+        v = entity.models.DocumentProperties.objects.filter(**{
+            'entity__' + entity_key: entity_v
+        }).values_list('document_id', flat=True)
     else:
         key = "%s%s" % (k, {
             '==': '__iexact',