48 lines
1.2 KiB
Python
Executable file
48 lines
1.2 KiB
Python
Executable file
#!/usr/bin/python3
|
|
import sys
|
|
import re
|
|
import ox.api
|
|
|
|
if len(sys.argv) != 3:
|
|
print('usage: %s <site> <collection>' % sys.argv[0])
|
|
sys.exit(1)
|
|
|
|
site = sys.argv[1]
|
|
collection = sys.argv[2]
|
|
|
|
api = ox.api.signin(site)
|
|
|
|
for d in api.findDocuments({
|
|
'query': {
|
|
'conditions': [{'key': 'collection', 'value': collection, 'operator': '=='}]
|
|
},
|
|
'keys': ['id', 'title'],
|
|
'sort': [{'key': 'title', 'operator': '+'}],
|
|
'range': [0, 10000]
|
|
})['data']['items']:
|
|
|
|
document = api.getDocument({
|
|
'id': d['id']
|
|
})['data']
|
|
# all edits here
|
|
edit = {}
|
|
title = document['title']
|
|
|
|
title = title.replace('_ocr', '')
|
|
title = title.replace('_', ' ').strip()
|
|
if title != document['title']:
|
|
edit['title'] = title
|
|
|
|
year = re.compile(' \d{4} ').findall(document['title'])
|
|
if year and year[0] in ('1', '2') and not document.get('date'):
|
|
document['date'] = edit['date'] = year[0]
|
|
|
|
if document.get('date') and document['date'] in title:
|
|
title = title.replace(document['date'], '').strip()
|
|
if title:
|
|
edit['title'] = title
|
|
|
|
if edit:
|
|
edit['id'] = d['id']
|
|
print('update', d['id'], d['title'])
|
|
api.editDocument(edit)
|