add volume

This commit is contained in:
j 2020-06-28 17:23:51 +02:00
parent bab1c3859a
commit 2de2e27975
3 changed files with 53 additions and 13 deletions

View File

@ -73,6 +73,8 @@ for doc in titles:
('[^\d]([^, (-]+? \d+, \d{4})', '%B %d, %Y', '%Y-%m-%d'),
('([^\d]{3} \d+ \d{4})', '%b %d %Y', '%Y-%m-%d'),
('[^\d]([^,]{3}, \d{4})', '%b, %Y', '%Y-%m'),
('[^\d]([^,]+?, \d{4})', '%B, %Y', '%Y-%m'),
('[^\d]([^ (]+?, \d{4})', '%B, %Y', '%Y-%m'),

View File

@ -17,26 +17,31 @@ else:
conditions = []
for regex, fragments in (
['Vol. \d+, No. \d+', ['Vol.', 'No.']],
['Vol \d+ no \d+', [' Vol ', ' no ']],
['Vol:\d+ #\d+', ['Vol:', '#']],
['Vol. (\d+), No. (\d+)', ['Vol.', 'No.']],
['Vol (\d+) no (\d+)', [' Vol ', ' no ']],
['Vol:(\d+) #(\d+)', ['Vol:', '#']],
#['no (\d+) (\d+)-', [' no ']],
):
query = {
'conditions': [
{'key': 'title', 'operator': '=', 'value': fragment}
for fragment in fragments
] + conditions,
'operator': '&'
}
for doc in api.findDocuments({
'query': {
'conditions': [
{'key': 'title', 'operator': '=', 'value': fragment}
for fragment in fragments
] + conditions,
'operator': '&'
},
'keys': ['id', 'title', 'volume'],
'query': query,
'keys': ['id', 'title', 'volume', 'issue'],
'range': [0, 10000]
})['data']['items']:
if 'Vol' not in doc.get('volume', ''):
if not doc.get('volume', ''):
m = re.compile(regex).findall(doc['title'])
if m:
print(m[0], doc['id'], doc['title'])
volume = m[0][0]
issue = m[0][1]
api.editDocument({
'id': doc['id'],
'volume': m[0].strip()
'volume': volume,
'issue': issue,
})

33
cleanup/search_replace.py Executable file
View File

@ -0,0 +1,33 @@
#!/usr/bin/python3
import re
import sys
import ox
import ox.api
site = 'archive.leftove.rs'
api = ox.api.signin('https://%s/api/' % site)
collection = sys.argv[1]
search = sys.argv[2]
replace = sys.argv[3]
for doc in api.findDocuments({
'query': {
'conditions': [
{'key': 'collection', 'operator': '=', 'value': collection}
],
'operator': '&'
},
'keys': ['id', 'title'],
'range': [0, 10000]
})['data']['items']:
title = doc['title'].replace(search, replace)
if title != doc['title']:
print(doc['id'], doc['title'])
print('\t->', title)
api.editDocument({
'id': doc['id'],
'title': title
})