get documents in chunks

This commit is contained in:
j 2017-01-05 01:44:09 +01:00
parent 69b6e08731
commit ce0e59c9ba

View file

@ -814,11 +814,14 @@ class Client(object):
'range': [o, o+chunk]
})['data']['items'] if f['extension'] in DOCUMENT_FORMATS]
o += chunk
documents = []
missing = list(set((f['id'], f['item']) for f in files))
availabale = set()
total = len(missing)
ids = [m[0] for m in missing]
o = 0
ids = list(set(f['id'] for f in files))
chunk = 1000
while o < len(ids):
documents += self.api.findDocuments({
for d in self.api.findDocuments({
'query': {
'conditions': [
{'key': 'oshash', 'operator': '==', 'value': id}
@ -826,13 +829,12 @@ class Client(object):
],
'operator': '|'
},
'keys': ['id', 'oshash', 'extension'],
'keys': ['oshash'],
'range': [0, chunk]
})['data']['items']
})['data']['items']:
available.add(d['oshash'])
o += chunk
available = set(f['oshash'] for f in documents)
missing = [(f['id'], f['item']) for f in files if f['id'] not in available]
missing = list(set(missing))
missing = [m for m in missing if m[0] not in available]
return missing
def find_document(self, oshash):