#!/usr/bin/python3 import collections import json import ox import ox.api site = 'pandora.cinemusespace.com' api = ox.api.signin('https://%s/api/' % site) keywords = collections.Counter() query = { 'conditions': [{ 'key': 'layer', 'value': 'keywords', 'operator': '==' }], 'operator': '&' } count = api.findAnnotations({'query': query})['data']['items'] position = 0 chunk = 1000 while position < count: r = api.findAnnotations({ 'query': query, 'keys': ['id', 'in', 'out', 'value', 'user', 'created'], 'sort': [{'key': 'public_id', 'operator': '+'}], 'range': [position, position+chunk] }) if 'data' not in r: print('failed', r) for annotation in r['data']['items']: if annotation['id'].startswith('BA/'): continue keyword = annotation['value'] if ': ' not in keyword: keyword = 'other: ' + keyword keywords[keyword] += 1 position += chunk with open('keywords.json', 'w') as fd: json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)