no upper limit for number of documents

This commit is contained in:
j 2016-10-27 01:11:42 +02:00
parent db3c235e15
commit 86cccee265
1 changed files with 42 additions and 22 deletions

View File

@ -784,28 +784,48 @@ class Client(object):
print(r)
def _get_documents(self):
files = self.api.findMedia({
'query': {
'conditions': [
{'key': 'filename', 'operator': '', 'value': value}
for value in DOCUMENT_FORMATS
],
'operator': '|'
},
'keys': ['item', 'id', 'extension'],
'range': [0, 5000]
})['data']['items']
d = self.api.findDocuments({
'query': {
'conditions': [
{'key': 'oshash', 'operator': '==', 'value': f['id']}
for f in files
],
'operator': '|'
},
'keys': ['id', 'oshash', 'extension'],
'range': [0, len(files)]
})['data']['items']
query = {
'conditions': [
{'key': 'filename', 'operator': '', 'value': value}
for value in DOCUMENT_FORMATS
],
'operator': '|'
}
n = self.api.findMedia({'query': query})['data']['items']
if n:
o = 0
chunk = 5000
files = []
while o < n:
files += self.api.findMedia({
'query': {
'conditions': [
{'key': 'filename', 'operator': '', 'value': value}
for value in DOCUMENT_FORMATS
],
'operator': '|'
},
'keys': ['item', 'id', 'extension'],
'range': [o, o+chunk]
})['data']['items']
o += chunk
d = []
o = 0
while o < len(files):
d += self.api.findDocuments({
'query': {
'conditions': [
{'key': 'oshash', 'operator': '==', 'value': f['id']}
for f in files[o:o+chunk]
],
'operator': '|'
},
'keys': ['id', 'oshash', 'extension'],
'range': [0, chunk]
})['data']['items']
o += chunk
else:
d = []
available = set(f['oshash']
for f in d if f['extension'] in DOCUMENT_FORMATS)
missing = [(f['id'], f['item']) for f in files