diff --git a/ontology/update.py b/ontology/update.py index 47125c6..de51c31 100755 --- a/ontology/update.py +++ b/ontology/update.py @@ -3,10 +3,15 @@ import json import os from collections import defaultdict +base = os.path.abspath(os.path.dirname(__file__)) + +keywords = json.load(open(os.path.join(base, 'keywords.json'))) +ontology = json.load(open(os.path.join(base, 'ontology.json'))) + def find_path(parent, root=None, path=None): if root is None: root = ontology - if path == None: + if path is None: path = [] for key in root: if key == parent: @@ -16,24 +21,23 @@ def find_path(parent, root=None, path=None): if r: return r -def get_node(name, children): +def get_node(name, children, parent=None): node = { "size": len(children) + 100, "name": name, - "children": [get_node(child, children[child]) for child in children] + "children": [get_node(child, children[child], name) for child in children] } if not node['children']: del node['children'] + key = '%s: %s' % (parent, name) + if key in keywords: + node['size'] = keywords[key] return node if __name__ == '__main__': - base = os.path.abspath(os.path.dirname(__file__)) os.chdir(base) - keywords = json.load(open('keywords.json')) - ontology = json.load(open('ontology.json')) - tree = defaultdict(dict) for keyword in keywords: @@ -41,7 +45,7 @@ if __name__ == '__main__': parent = 'other' child = keyword else: - parent, child = keyword.split(': ') + parent, child = keyword.split(': ', 1) path = find_path(parent) if path: p = tree @@ -51,6 +55,9 @@ if __name__ == '__main__': p = p[part] p[child] = {} else: + if parent not in tree['missing']: + tree['missing'][parent] = {} + tree['missing'][parent][child] = {} print('missing root - %s: %s' % (parent, child)) #print(json.dumps(tree, indent=4, sort_keys=True)) @@ -61,7 +68,7 @@ if __name__ == '__main__': } for name in tree: children = tree[name] - child = get_node(name, tree[name]) + child = get_node(name, tree[name], name) sized_ontology['children'].append(child) with open('../static/ontology/sized_ontology.json', 'w') as fd: diff --git a/ontology/update_keywords.py b/ontology/update_keywords.py index f0a5b07..0a16852 100755 --- a/ontology/update_keywords.py +++ b/ontology/update_keywords.py @@ -1,32 +1,14 @@ #!/usr/bin/python3 - -import getpass +import collections import json -import sys import ox -import ox.web.auth +import ox.api site = 'pandora.cinemusespace.com' -api = ox.API('https://%s/api/' % site) -update = False -try: - credentials = ox.web.auth.get(site) -except: - credentials = {} - print('Please provide your username and password for %s:' % site) - credentials['username'] = input('Username: ') - credentials['password'] = getpass.getpass('Password: ') - update = True -r = api.signin(**credentials) -if 'errors' in r.get('data', {}): - for kv in r['data']['errors'].items(): - print('%s: %s' % kv) - sys.exit(1) -if update: - ox.web.auth.update(site, credentials) +api = ox.api.signin('https://%s/api/' % site) -keywords = set() +keywords = collections.Counter() for annotation in api.findAnnotations({ 'query': { 'conditions': [{ @@ -39,8 +21,10 @@ for annotation in api.findAnnotations({ 'keys': ['id', 'in', 'out', 'value', 'user', 'created'], 'range': [0, 500000] })['data']['items']: - keywords.add(annotation['value']) - + keyword = annotation['value'] + if ': ' not in keyword: + keyword = 'other: ' + keyword + keywords[keyword] += 1 with open('keywords.json', 'w') as fd: - json.dump(list(sorted(keywords)), fd, indent=4) + json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)