only upload documents once

This commit is contained in:
j 2016-10-29 16:17:06 +02:00
parent 662c8569fb
commit 9e40462d78

View file

@ -808,20 +808,21 @@ class Client(object):
o = 0 o = 0
chunk = 5000 chunk = 5000
while o < n: while o < n:
files += self.api.findMedia({ files += [f for f in self.api.findMedia({
'query': query, 'query': query,
'keys': ['item', 'id', 'extension'], 'keys': ['item', 'id', 'extension'],
'range': [o, o+chunk] 'range': [o, o+chunk]
})['data']['items'] })['data']['items'] if f['extension'] in DOCUMENT_FORMATS]
o += chunk o += chunk
d = [] documents = []
o = 0 o = 0
while o < len(files): ids = list(set(f['id'] for f in files))
d += self.api.findDocuments({ while o < len(ids):
documents += self.api.findDocuments({
'query': { 'query': {
'conditions': [ 'conditions': [
{'key': 'oshash', 'operator': '==', 'value': f['id']} {'key': 'oshash', 'operator': '==', 'value': id}
for f in files[o:o+chunk] for id in ids[o:o+chunk]
], ],
'operator': '|' 'operator': '|'
}, },
@ -829,10 +830,9 @@ class Client(object):
'range': [0, chunk] 'range': [0, chunk]
})['data']['items'] })['data']['items']
o += chunk o += chunk
available = set(f['oshash'] available = set(f['oshash'] for f in documents)
for f in d if f['extension'] in DOCUMENT_FORMATS) missing = [(f['id'], f['item']) for f in files if f['id'] not in available]
missing = [(f['id'], f['item']) for f in files missing = list(set(missing))
if f['id'] not in available and f['extension'] in DOCUMENT_FORMATS]
return missing return missing
def find_document(self, oshash): def find_document(self, oshash):
@ -867,7 +867,7 @@ class Client(object):
'id': did, 'id': did,
'item': item 'item': item
}) })
return True return did
def upload_document(self, args): def upload_document(self, args):
if not self.user: if not self.user: