add volume
This commit is contained in:
parent
bab1c3859a
commit
2de2e27975
3 changed files with 53 additions and 13 deletions
|
@ -73,6 +73,8 @@ for doc in titles:
|
|||
|
||||
('[^\d]([^, (-]+? \d+, \d{4})', '%B %d, %Y', '%Y-%m-%d'),
|
||||
|
||||
('([^\d]{3} \d+ \d{4})', '%b %d %Y', '%Y-%m-%d'),
|
||||
|
||||
('[^\d]([^,]{3}, \d{4})', '%b, %Y', '%Y-%m'),
|
||||
('[^\d]([^,]+?, \d{4})', '%B, %Y', '%Y-%m'),
|
||||
('[^\d]([^ (]+?, \d{4})', '%B, %Y', '%Y-%m'),
|
||||
|
|
|
@ -17,26 +17,31 @@ else:
|
|||
conditions = []
|
||||
|
||||
for regex, fragments in (
|
||||
['Vol. \d+, No. \d+', ['Vol.', 'No.']],
|
||||
['Vol \d+ no \d+', [' Vol ', ' no ']],
|
||||
['Vol:\d+ #\d+', ['Vol:', '#']],
|
||||
['Vol. (\d+), No. (\d+)', ['Vol.', 'No.']],
|
||||
['Vol (\d+) no (\d+)', [' Vol ', ' no ']],
|
||||
['Vol:(\d+) #(\d+)', ['Vol:', '#']],
|
||||
#['no (\d+) (\d+)-', [' no ']],
|
||||
):
|
||||
for doc in api.findDocuments({
|
||||
'query': {
|
||||
query = {
|
||||
'conditions': [
|
||||
{'key': 'title', 'operator': '=', 'value': fragment}
|
||||
for fragment in fragments
|
||||
] + conditions,
|
||||
'operator': '&'
|
||||
},
|
||||
'keys': ['id', 'title', 'volume'],
|
||||
}
|
||||
for doc in api.findDocuments({
|
||||
'query': query,
|
||||
'keys': ['id', 'title', 'volume', 'issue'],
|
||||
'range': [0, 10000]
|
||||
})['data']['items']:
|
||||
if 'Vol' not in doc.get('volume', ''):
|
||||
if not doc.get('volume', ''):
|
||||
m = re.compile(regex).findall(doc['title'])
|
||||
if m:
|
||||
print(m[0], doc['id'], doc['title'])
|
||||
volume = m[0][0]
|
||||
issue = m[0][1]
|
||||
api.editDocument({
|
||||
'id': doc['id'],
|
||||
'volume': m[0].strip()
|
||||
'volume': volume,
|
||||
'issue': issue,
|
||||
})
|
||||
|
|
33
cleanup/search_replace.py
Executable file
33
cleanup/search_replace.py
Executable file
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/python3
|
||||
import re
|
||||
import sys
|
||||
|
||||
import ox
|
||||
import ox.api
|
||||
|
||||
site = 'archive.leftove.rs'
|
||||
api = ox.api.signin('https://%s/api/' % site)
|
||||
|
||||
|
||||
collection = sys.argv[1]
|
||||
search = sys.argv[2]
|
||||
replace = sys.argv[3]
|
||||
|
||||
for doc in api.findDocuments({
|
||||
'query': {
|
||||
'conditions': [
|
||||
{'key': 'collection', 'operator': '=', 'value': collection}
|
||||
],
|
||||
'operator': '&'
|
||||
},
|
||||
'keys': ['id', 'title'],
|
||||
'range': [0, 10000]
|
||||
})['data']['items']:
|
||||
title = doc['title'].replace(search, replace)
|
||||
if title != doc['title']:
|
||||
print(doc['id'], doc['title'])
|
||||
print('\t->', title)
|
||||
api.editDocument({
|
||||
'id': doc['id'],
|
||||
'title': title
|
||||
})
|
Loading…
Reference in a new issue