#!/usr/bin/python3 import sys import re import ox.api if len(sys.argv) != 3: print('usage: %s ' % sys.argv[0]) sys.exit(1) site = sys.argv[1] collection = sys.argv[2] api = ox.api.signin(site) for d in api.findDocuments({ 'query': { 'conditions': [{'key': 'collection', 'value': collection, 'operator': '=='}] }, 'keys': ['id', 'title'], 'sort': [{'key': 'title', 'operator': '+'}], 'range': [0, 10000] })['data']['items']: document = api.getDocument({ 'id': d['id'] })['data'] # all edits here edit = {} title = document['title'] title = title.replace('_ocr', '') title = title.replace('_', ' ').strip() if title != document['title']: edit['title'] = title year = re.compile(' (\d{4})( |$)').findall(document['title']) if year and year[0][0][0] in ('1', '2') and not document.get('date'): document['date'] = edit['date'] = year[0][0] if document.get('date') and document['date'] in title: title = title.replace(document['date'], '').strip() if title: edit['title'] = title if edit: edit['id'] = d['id'] print('update', d['id'], d['title']) api.editDocument(edit)