diff --git a/ontology/update_keywords.py b/ontology/update_keywords.py index 1b8621e..f829b48 100755 --- a/ontology/update_keywords.py +++ b/ontology/update_keywords.py @@ -9,24 +9,35 @@ site = 'pandora.cinemusespace.com' api = ox.api.signin('https://%s/api/' % site) keywords = collections.Counter() -for annotation in api.findAnnotations({ - 'query': { +query = { 'conditions': [{ 'key': 'layer', 'value': 'keywords', 'operator': '==' }], 'operator': '&' - }, - 'keys': ['id', 'in', 'out', 'value', 'user', 'created'], - 'range': [0, 500000] -})['data']['items']: - if annotation['id'].startswith('BA/'): - continue - keyword = annotation['value'] - if ': ' not in keyword: - keyword = 'other: ' + keyword - keywords[keyword] += 1 +} +count = api.findAnnotations({'query': query})['data']['items'] +position = 0 +chunk = 1000 + +while position < count: + r = api.findAnnotations({ + 'query': query, + 'keys': ['id', 'in', 'out', 'value', 'user', 'created'], + 'sort': [{'key': 'public_id', 'operator': '+'}], + 'range': [position, position+chunk] + }) + if 'data' not in r: + print('failed', r) + for annotation in r['data']['items']: + if annotation['id'].startswith('BA/'): + continue + keyword = annotation['value'] + if ': ' not in keyword: + keyword = 'other: ' + keyword + keywords[keyword] += 1 + position += chunk with open('keywords.json', 'w') as fd: json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)