load annotations in chunks

2018-11-15 15:30:49 +00:00 · 2018-11-15 15:30:49 +00:00 · ebf2ba4dbd
commit ebf2ba4dbd
parent 0b5d56ed94
1 changed files with 23 additions and 12 deletions
--- a/ontology/update_keywords.py
+++ b/ontology/update_keywords.py
@ -9,24 +9,35 @@ site = 'pandora.cinemusespace.com'
 api = ox.api.signin('https://%s/api/' % site)

 keywords = collections.Counter()
-for annotation in api.findAnnotations({
-    'query': {
+query = {
        'conditions': [{
            'key': 'layer',
            'value': 'keywords',
            'operator': '=='
        }],
        'operator': '&'
-    },
+}
+count = api.findAnnotations({'query': query})['data']['items']
+position = 0
+chunk = 1000
+
+while position < count:
+    r = api.findAnnotations({
+        'query': query,
        'keys': ['id', 'in', 'out', 'value', 'user', 'created'],
-    'range': [0, 500000]
-})['data']['items']:
+        'sort': [{'key': 'public_id', 'operator': '+'}],
+        'range': [position, position+chunk]
+    })
+    if 'data' not in r:
+        print('failed', r)
+    for annotation in r['data']['items']:
        if annotation['id'].startswith('BA/'):
            continue
        keyword = annotation['value']
        if ': ' not in keyword:
            keyword = 'other: ' + keyword
        keywords[keyword] += 1
+    position += chunk

 with open('keywords.json', 'w') as fd:
    json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)