no upper limit for number of documents

This commit is contained in:
j 2016-10-27 01:11:42 +02:00
parent db3c235e15
commit 86cccee265

View file

@ -784,28 +784,48 @@ class Client(object):
print(r) print(r)
def _get_documents(self): def _get_documents(self):
files = self.api.findMedia({ query = {
'query': { 'conditions': [
'conditions': [ {'key': 'filename', 'operator': '', 'value': value}
{'key': 'filename', 'operator': '', 'value': value} for value in DOCUMENT_FORMATS
for value in DOCUMENT_FORMATS ],
], 'operator': '|'
'operator': '|' }
}, n = self.api.findMedia({'query': query})['data']['items']
'keys': ['item', 'id', 'extension'], if n:
'range': [0, 5000] o = 0
})['data']['items'] chunk = 5000
d = self.api.findDocuments({ files = []
'query': { while o < n:
'conditions': [ files += self.api.findMedia({
{'key': 'oshash', 'operator': '==', 'value': f['id']} 'query': {
for f in files 'conditions': [
], {'key': 'filename', 'operator': '', 'value': value}
'operator': '|' for value in DOCUMENT_FORMATS
}, ],
'keys': ['id', 'oshash', 'extension'], 'operator': '|'
'range': [0, len(files)] },
})['data']['items'] 'keys': ['item', 'id', 'extension'],
'range': [o, o+chunk]
})['data']['items']
o += chunk
d = []
o = 0
while o < len(files):
d += self.api.findDocuments({
'query': {
'conditions': [
{'key': 'oshash', 'operator': '==', 'value': f['id']}
for f in files[o:o+chunk]
],
'operator': '|'
},
'keys': ['id', 'oshash', 'extension'],
'range': [0, chunk]
})['data']['items']
o += chunk
else:
d = []
available = set(f['oshash'] available = set(f['oshash']
for f in d if f['extension'] in DOCUMENT_FORMATS) for f in d if f['extension'] in DOCUMENT_FORMATS)
missing = [(f['id'], f['item']) for f in files missing = [(f['id'], f['item']) for f in files