pandora_client_eightfiftyeight/upload_documents.py

56 lines
1.4 KiB
Python
Raw Normal View History

2017-05-31 11:36:08 +00:00
#!/usr/bin/python3
2016-11-05 15:13:13 +00:00
#
# upload documents
#
from __future__ import print_function
import os
import subprocess
from glob import glob
import json
import ox
import pandora_client
from pandora_client.utils import hash_prefix
MISSING = os.path.expanduser('~/.ox/missing_documents.json')
DONE = os.path.expanduser('~/.ox/documents_uploaded.txt')
def get_documents(client):
if os.path.exists(MISSING):
with open(MISSING) as fd:
documents = json.load(fd)
else:
documents = client._get_documents()
with open(MISSING, 'w') as fd:
json.dump(documents, fd, indent=4)
return documents
def done(oshash):
with open(DONE, 'a') as fd:
fd.write(oshash + '\n')
2017-05-31 11:36:08 +00:00
2016-11-05 15:13:13 +00:00
if __name__ == '__main__':
uploaded = []
if os.path.exists(DONE):
with open(DONE) as fd:
uploaded = fd.read().strip().split('\n')
client = pandora_client.Client(os.path.expanduser('~/.ox/client.json'), False)
documents = get_documents(client)
_documents = []
for oshash, item in documents:
if oshash in uploaded:
continue
for path in client.path(oshash):
if os.path.exists(path):
_documents.append([path, oshash, item])
break
print('uploading %s documents' % len(_documents))
for path, oshash, item in _documents:
2017-05-31 11:36:08 +00:00
print(path, item)
2016-11-05 15:13:13 +00:00
client._add_document(path, item)
done(oshash)