From c0cff7c541a46e8379806487fb435b14d5db3b5f Mon Sep 17 00:00:00 2001 From: j Date: Sat, 8 Jun 2019 10:32:39 +0100 Subject: [PATCH] add cleanup example --- cleanup_collection.py | 47 +++++++++++++++++++++++++++++++++++++++ rename_document_series.py | 1 + 2 files changed, 48 insertions(+) create mode 100755 cleanup_collection.py diff --git a/cleanup_collection.py b/cleanup_collection.py new file mode 100755 index 0000000..998d450 --- /dev/null +++ b/cleanup_collection.py @@ -0,0 +1,47 @@ +#!/usr/bin/python3 +import sys +import re +import ox.api + +if len(sys.argv) != 3: + print('usage: %s ' % sys.argv[0]) + sys.exit(1) + +site = sys.argv[1] +collection = sys.argv[2] + +api = ox.api.signin(site) + +for d in api.findDocuments({ + 'query': { + 'conditions': [{'key': 'collection', 'value': collection, 'operator': '=='}] + }, + 'keys': ['id', 'title'], + 'sort': [{'key': 'title', 'operator': '+'}], + 'range': [0, 10000] +})['data']['items']: + + document = api.getDocument({ + 'id': d['id'] + })['data'] + # all edits here + edit = {} + title = document['title'] + + title = title.replace('_ocr', '') + title = title.replace('_', ' ').strip() + if title != document['title']: + edit['title'] = title + + year = re.compile('\d{4}').findall(document['title']) + if year and not document.get('date'): + document['date'] = edit['date'] = year[0] + + if document.get('date') and document['date'] in title: + title = title.replace(document['date'], '').strip() + edit['title'] = title + + if edit: + edit['id'] = d['id'] + print('update', d['id'], d['title']) + api.editDocument(edit) diff --git a/rename_document_series.py b/rename_document_series.py index d2a29b1..7b2a518 100755 --- a/rename_document_series.py +++ b/rename_document_series.py @@ -4,6 +4,7 @@ import ox.api if len(sys.argv) != 4: print('usage: %s ' % sys.argv[0]) + sys.exit(1) site = sys.argv[1] old_prefix = sys.argv[2]