#!/usr/bin/python3
import collections
import json

import ox
import ox.api

site = 'pandora.cinemusespace.com'
api = ox.api.signin('https://%s/api/' % site)

keywords = collections.Counter()
query = {
        'conditions': [{
            'key': 'layer',
            'value': 'keywords',
            'operator': '=='
        }],
        'operator': '&'
}
count = api.findAnnotations({'query': query})['data']['items']
position = 0
chunk = 1000

while position < count:
    r = api.findAnnotations({
        'query': query,
        'keys': ['id', 'in', 'out', 'value', 'user', 'created'],
        'sort': [{'key': 'public_id', 'operator': '+'}],
        'range': [position, position+chunk]
    })
    if 'data' not in r:
        print('failed', r)
    for annotation in r['data']['items']:
        if annotation['id'].startswith('BA/'):
            continue
        keyword = annotation['value']
        if ': ' not in keyword:
            keyword = 'other: ' + keyword
        keywords[keyword] += 1
    position += chunk

with open('keywords.json', 'w') as fd:
    json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)