get documents in chunks

This commit is contained in:
j 2017-01-05 01:44:09 +01:00
parent 69b6e08731
commit ce0e59c9ba

View file

@ -814,11 +814,14 @@ class Client(object):
'range': [o, o+chunk] 'range': [o, o+chunk]
})['data']['items'] if f['extension'] in DOCUMENT_FORMATS] })['data']['items'] if f['extension'] in DOCUMENT_FORMATS]
o += chunk o += chunk
documents = [] missing = list(set((f['id'], f['item']) for f in files))
availabale = set()
total = len(missing)
ids = [m[0] for m in missing]
o = 0 o = 0
ids = list(set(f['id'] for f in files)) chunk = 1000
while o < len(ids): while o < len(ids):
documents += self.api.findDocuments({ for d in self.api.findDocuments({
'query': { 'query': {
'conditions': [ 'conditions': [
{'key': 'oshash', 'operator': '==', 'value': id} {'key': 'oshash', 'operator': '==', 'value': id}
@ -826,13 +829,12 @@ class Client(object):
], ],
'operator': '|' 'operator': '|'
}, },
'keys': ['id', 'oshash', 'extension'], 'keys': ['oshash'],
'range': [0, chunk] 'range': [0, chunk]
})['data']['items'] })['data']['items']:
available.add(d['oshash'])
o += chunk o += chunk
available = set(f['oshash'] for f in documents) missing = [m for m in missing if m[0] not in available]
missing = [(f['id'], f['item']) for f in files if f['id'] not in available]
missing = list(set(missing))
return missing return missing
def find_document(self, oshash): def find_document(self, oshash):