load annotations in chunks
This commit is contained in:
parent
0b5d56ed94
commit
ebf2ba4dbd
1 changed files with 23 additions and 12 deletions
|
@ -9,24 +9,35 @@ site = 'pandora.cinemusespace.com'
|
|||
api = ox.api.signin('https://%s/api/' % site)
|
||||
|
||||
keywords = collections.Counter()
|
||||
for annotation in api.findAnnotations({
|
||||
'query': {
|
||||
query = {
|
||||
'conditions': [{
|
||||
'key': 'layer',
|
||||
'value': 'keywords',
|
||||
'operator': '=='
|
||||
}],
|
||||
'operator': '&'
|
||||
},
|
||||
}
|
||||
count = api.findAnnotations({'query': query})['data']['items']
|
||||
position = 0
|
||||
chunk = 1000
|
||||
|
||||
while position < count:
|
||||
r = api.findAnnotations({
|
||||
'query': query,
|
||||
'keys': ['id', 'in', 'out', 'value', 'user', 'created'],
|
||||
'range': [0, 500000]
|
||||
})['data']['items']:
|
||||
'sort': [{'key': 'public_id', 'operator': '+'}],
|
||||
'range': [position, position+chunk]
|
||||
})
|
||||
if 'data' not in r:
|
||||
print('failed', r)
|
||||
for annotation in r['data']['items']:
|
||||
if annotation['id'].startswith('BA/'):
|
||||
continue
|
||||
keyword = annotation['value']
|
||||
if ': ' not in keyword:
|
||||
keyword = 'other: ' + keyword
|
||||
keywords[keyword] += 1
|
||||
position += chunk
|
||||
|
||||
with open('keywords.json', 'w') as fd:
|
||||
json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)
|
||||
|
|
Loading…
Reference in a new issue