2018-05-31 14:59:17 +00:00
|
|
|
#!/usr/bin/python3
|
2018-05-31 19:32:56 +00:00
|
|
|
import collections
|
2018-05-31 14:59:17 +00:00
|
|
|
import json
|
|
|
|
|
|
|
|
import ox
|
2018-05-31 19:21:34 +00:00
|
|
|
import ox.api
|
2018-05-31 14:59:17 +00:00
|
|
|
|
2019-05-13 11:27:40 +00:00
|
|
|
site = 'cineurban.cinemusespace.com'
|
2018-05-31 19:21:34 +00:00
|
|
|
api = ox.api.signin('https://%s/api/' % site)
|
2018-05-31 14:59:17 +00:00
|
|
|
|
2018-05-31 19:32:56 +00:00
|
|
|
keywords = collections.Counter()
|
2018-11-15 15:30:49 +00:00
|
|
|
query = {
|
2018-05-31 14:59:17 +00:00
|
|
|
'conditions': [{
|
|
|
|
'key': 'layer',
|
|
|
|
'value': 'keywords',
|
|
|
|
'operator': '=='
|
|
|
|
}],
|
|
|
|
'operator': '&'
|
2018-11-15 15:30:49 +00:00
|
|
|
}
|
|
|
|
count = api.findAnnotations({'query': query})['data']['items']
|
|
|
|
position = 0
|
|
|
|
chunk = 1000
|
|
|
|
|
|
|
|
while position < count:
|
|
|
|
r = api.findAnnotations({
|
|
|
|
'query': query,
|
|
|
|
'keys': ['id', 'in', 'out', 'value', 'user', 'created'],
|
|
|
|
'sort': [{'key': 'public_id', 'operator': '+'}],
|
|
|
|
'range': [position, position+chunk]
|
|
|
|
})
|
|
|
|
if 'data' not in r:
|
|
|
|
print('failed', r)
|
|
|
|
for annotation in r['data']['items']:
|
|
|
|
if annotation['id'].startswith('BA/'):
|
|
|
|
continue
|
|
|
|
keyword = annotation['value']
|
|
|
|
if ': ' not in keyword:
|
|
|
|
keyword = 'other: ' + keyword
|
|
|
|
keywords[keyword] += 1
|
|
|
|
position += chunk
|
2018-05-31 14:59:17 +00:00
|
|
|
|
|
|
|
with open('keywords.json', 'w') as fd:
|
2018-05-31 19:32:56 +00:00
|
|
|
json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)
|