From 92a4e77f9b5ef0693605ae50b7072809b140214e Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 28 Jun 2016 15:59:16 +0100 Subject: [PATCH 1/3] chunkupload: typo: uplaod -> upload --- static/js/chunkupload.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/static/js/chunkupload.js b/static/js/chunkupload.js index ba83a096..2d37d807 100644 --- a/static/js/chunkupload.js +++ b/static/js/chunkupload.js @@ -89,7 +89,7 @@ pandora.chunkupload = function(options) { } }, false); request.addEventListener('error', function (evt) { - that.status = 'uplaod failed'; + that.status = 'upload failed'; that.progress = -1; that.responseText = evt.target.responseText; done(); @@ -100,7 +100,7 @@ pandora.chunkupload = function(options) { done(); }, false); var formData = new FormData(); - + Object.keys(options.data).forEach(function(key) { formData.append(key, options.data[key]); }); @@ -176,7 +176,7 @@ pandora.chunkupload = function(options) { // failed to upload, try again in 5 second retries++; if (maxRetry > 0 && retries > maxRetry) { - that.status = 'uplaod failed'; + that.status = 'upload failed'; that.progress = -1; done(); } else { @@ -195,7 +195,7 @@ pandora.chunkupload = function(options) { // failed to upload, try again in 3 second retries++; if (maxRetry > 0 && retries > maxRetry) { - that.status = 'uplaod failed'; + that.status = 'upload failed'; that.progress = -1; done(); } else { From 8d25e3be78e751f65375ae6cbe1a8001b83f3f32 Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 28 Jun 2016 13:11:09 +0000 Subject: [PATCH 2/3] findDocuments: improve entity query performance When I implemented this in 9a4c24c, there were not many rows in entity_documentproperties in the database here. Now that there are, computing the document_document -> entity_documentproperties -> entity_entity join and then filtering is really, really slow. Postgres seems to materialize the whole join and then scan it. If we get a set of matching document IDs for the entity query in a subquery, and then just filter with IN on that, things are much faster: scan entity_entity; in a nested loop, get the document_ids via entity_documentproperties; hash this set; and then scan document_document. Searching for a single character, this brings the query from ~1.1s to ~400ms. Searching for a full word, ~800ms to 120ms This condition is getting really ugly -- I am sorry! References #2935 --- pandora/document/managers.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandora/document/managers.py b/pandora/document/managers.py index 2772d63c..f9d26b11 100644 --- a/pandora/document/managers.py +++ b/pandora/document/managers.py @@ -35,14 +35,18 @@ def parseCondition(condition, user, item=None): def buildCondition(k, op, v): + import entity.models if k == 'id': v = ox.fromAZ(v) return Q(**{k: v}) if isinstance(v, bool): #featured and public flag key = k elif k == 'entity': - entity_key, v = entity.managers.namePredicate(op, v) - key = 'entities__' + entity_key + entity_key, entity_v = entity.managers.namePredicate(op, v) + key = 'id__in' + v = entity.models.DocumentProperties.objects.filter(**{ + 'entity__' + entity_key: entity_v + }).values_list('document_id', flat=True) else: key = "%s%s" % (k, { '==': '__iexact', From 09ebbc9cc6ccf03d3b24c842042ac575749b3abc Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 28 Jun 2016 12:36:07 +0000 Subject: [PATCH 3/3] findDocuments: improve performance of positions queries References #2935 --- pandora/document/views.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/pandora/document/views.py b/pandora/document/views.py index 66a01e59..ab739f62 100644 --- a/pandora/document/views.py +++ b/pandora/document/views.py @@ -165,6 +165,33 @@ def parse_query(data, user): return query +def get_positions(qs, query_positions): + ''' + qs: a QuerySet + query_positions: a list of AZ ids + + TODO: merge this with item.utils.get_positions. The win is to fetch + only the integer IDs and convert the (smaller) set of query_positions to + ints, rather than fetch all keys for everything in qs (expected to be many + orders of magnitude larger), ignore most of it, and convert those ids to + strings. + + Returns: + { + i: index of i in qs + for i in query_positions + } + ''' + ids = list(qs.values_list('id', flat=True)) + ret = {} + for i in query_positions: + try: + ret[i] = ids.index(ox.fromAZ(i)) + except: + pass + return ret + + def findDocuments(request, data): ''' Finds documents for a given query @@ -198,8 +225,7 @@ def findDocuments(request, data): #FIXME: actually implement position requests response['data']['position'] = 0 elif 'positions' in data: - ids = [i.get_id() for i in qs] - response['data']['positions'] = utils.get_positions(ids, query['positions']) + response['data']['positions'] = get_positions(qs, query['positions']) else: r = qs.aggregate( Sum('size')