From 2de2e27975a902b7fd80e0ed2557b0b6e3ae6157 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 28 Jun 2020 17:23:51 +0200 Subject: [PATCH] add volume --- cleanup/add_date.py | 2 ++ cleanup/add_volume.py | 31 ++++++++++++++++++------------- cleanup/search_replace.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 13 deletions(-) create mode 100755 cleanup/search_replace.py diff --git a/cleanup/add_date.py b/cleanup/add_date.py index b51476b..47af0e0 100644 --- a/cleanup/add_date.py +++ b/cleanup/add_date.py @@ -73,6 +73,8 @@ for doc in titles: ('[^\d]([^, (-]+? \d+, \d{4})', '%B %d, %Y', '%Y-%m-%d'), + ('([^\d]{3} \d+ \d{4})', '%b %d %Y', '%Y-%m-%d'), + ('[^\d]([^,]{3}, \d{4})', '%b, %Y', '%Y-%m'), ('[^\d]([^,]+?, \d{4})', '%B, %Y', '%Y-%m'), ('[^\d]([^ (]+?, \d{4})', '%B, %Y', '%Y-%m'), diff --git a/cleanup/add_volume.py b/cleanup/add_volume.py index 26d3eb5..7fd8203 100755 --- a/cleanup/add_volume.py +++ b/cleanup/add_volume.py @@ -17,26 +17,31 @@ else: conditions = [] for regex, fragments in ( - ['Vol. \d+, No. \d+', ['Vol.', 'No.']], - ['Vol \d+ no \d+', [' Vol ', ' no ']], - ['Vol:\d+ #\d+', ['Vol:', '#']], + ['Vol. (\d+), No. (\d+)', ['Vol.', 'No.']], + ['Vol (\d+) no (\d+)', [' Vol ', ' no ']], + ['Vol:(\d+) #(\d+)', ['Vol:', '#']], + #['no (\d+) (\d+)-', [' no ']], ): + query = { + 'conditions': [ + {'key': 'title', 'operator': '=', 'value': fragment} + for fragment in fragments + ] + conditions, + 'operator': '&' + } for doc in api.findDocuments({ - 'query': { - 'conditions': [ - {'key': 'title', 'operator': '=', 'value': fragment} - for fragment in fragments - ] + conditions, - 'operator': '&' - }, - 'keys': ['id', 'title', 'volume'], + 'query': query, + 'keys': ['id', 'title', 'volume', 'issue'], 'range': [0, 10000] })['data']['items']: - if 'Vol' not in doc.get('volume', ''): + if not doc.get('volume', ''): m = re.compile(regex).findall(doc['title']) if m: print(m[0], doc['id'], doc['title']) + volume = m[0][0] + issue = m[0][1] api.editDocument({ 'id': doc['id'], - 'volume': m[0].strip() + 'volume': volume, + 'issue': issue, }) diff --git a/cleanup/search_replace.py b/cleanup/search_replace.py new file mode 100755 index 0000000..88b0b54 --- /dev/null +++ b/cleanup/search_replace.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 +import re +import sys + +import ox +import ox.api + +site = 'archive.leftove.rs' +api = ox.api.signin('https://%s/api/' % site) + + +collection = sys.argv[1] +search = sys.argv[2] +replace = sys.argv[3] + +for doc in api.findDocuments({ + 'query': { + 'conditions': [ + {'key': 'collection', 'operator': '=', 'value': collection} + ], + 'operator': '&' + }, + 'keys': ['id', 'title'], + 'range': [0, 10000] +})['data']['items']: + title = doc['title'].replace(search, replace) + if title != doc['title']: + print(doc['id'], doc['title']) + print('\t->', title) + api.editDocument({ + 'id': doc['id'], + 'title': title + })