helper scripts to import documents

This commit is contained in:
j 2016-11-05 16:13:13 +01:00
parent 8f86180f8f
commit 314e465588
2 changed files with 134 additions and 0 deletions

81
import_raw.py Executable file
View file

@ -0,0 +1,81 @@
#!/usr/bin/python
#
# apt install convert ufraw-batch
#
from __future__ import print_function
import os
import subprocess
from glob import glob
import ox
import pandora_client
from pandora_client.utils import hash_prefix
def convert_raw(raw, jpg):
ox.makedirs(os.path.dirname(jpg))
cmd = ['convert', raw, jpg]
print(' '.join(cmd))
subprocess.call(cmd)
def get_raw(client):
files = []
for volume in client.active_volumes():
query = {
'conditions': [
{'key': 'list', 'value': volume, 'operator': '=='},
{
'conditions': [
{'key': 'filename', 'value': '.cr2', 'operator': ''},
{'key': 'filename', 'value': '.nef', 'operator': ''},
],
'operator': '|'
},
],
'operator': '&'
}
n = client.api.findMedia({'query': query})['data']['items']
if n:
o = 0
chunk = 5000
while o < n:
files += client.api.findMedia({
'query': query,
'keys': ['item', 'id', 'extension'],
'range': [o, o+chunk]
})['data']['items']
o += chunk
files = [f for f in files if f['extension'].lower() in ('cr2', 'nef')]
return files
def import_raw(client):
files = get_raw(client)
print('got', len(files), 'raw files')
for f in files:
oshash = f['id']
print(oshash)
cache = os.path.join(client.media_cache(), os.path.join(*hash_prefix(oshash)))
cache = unicode(cache)
jpg = glob(u'%s/*.jpg' % cache)
if jpg:
jpg = jpg[0]
else:
jpg = None
for path in client.path(oshash):
if os.path.exists(path):
jpg = os.path.join(cache, u'%s.jpg' % os.path.basename(path).split('.')[0])
convert_cr2(path, jpg)
break
if jpg and os.path.exists(jpg):
r = client._add_document(jpg, f['item'])
doc = client.find_document(ox.oshash(jpg))
if doc:
client.api.editDocument({
'id': doc,
'description': 'Converted from %s [%s]' % (os.path.basename(path), oshash)
})
print('added', oshash, 'to', f['item'])
if __name__ == '__main__':
client = pandora_client.Client(os.path.expanduser('~/.ox/client.json'), False)
import_raw(client)

53
upload_documents.py Executable file
View file

@ -0,0 +1,53 @@
#!/usr/bin/python
#
# upload documents
#
from __future__ import print_function
import os
import subprocess
from glob import glob
import json
import ox
import pandora_client
from pandora_client.utils import hash_prefix
MISSING = os.path.expanduser('~/.ox/missing_documents.json')
DONE = os.path.expanduser('~/.ox/documents_uploaded.txt')
def get_documents(client):
if os.path.exists(MISSING):
with open(MISSING) as fd:
documents = json.load(fd)
else:
documents = client._get_documents()
with open(MISSING, 'w') as fd:
json.dump(documents, fd, indent=4)
return documents
def done(oshash):
with open(DONE, 'a') as fd:
fd.write(oshash + '\n')
if __name__ == '__main__':
uploaded = []
if os.path.exists(DONE):
with open(DONE) as fd:
uploaded = fd.read().strip().split('\n')
client = pandora_client.Client(os.path.expanduser('~/.ox/client.json'), False)
documents = get_documents(client)
_documents = []
for oshash, item in documents:
if oshash in uploaded:
continue
for path in client.path(oshash):
if os.path.exists(path):
_documents.append([path, oshash, item])
break
print('uploading %s documents' % len(_documents))
for path, oshash, item in _documents:
client._add_document(path, item)
done(oshash)