get keyword size
This commit is contained in:
parent
f81fa3a7d6
commit
de5368e47a
2 changed files with 20 additions and 20 deletions
|
@ -3,10 +3,15 @@ import json
|
||||||
import os
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
base = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
keywords = json.load(open(os.path.join(base, 'keywords.json')))
|
||||||
|
ontology = json.load(open(os.path.join(base, 'ontology.json')))
|
||||||
|
|
||||||
def find_path(parent, root=None, path=None):
|
def find_path(parent, root=None, path=None):
|
||||||
if root is None:
|
if root is None:
|
||||||
root = ontology
|
root = ontology
|
||||||
if path == None:
|
if path is None:
|
||||||
path = []
|
path = []
|
||||||
for key in root:
|
for key in root:
|
||||||
if key == parent:
|
if key == parent:
|
||||||
|
@ -16,32 +21,27 @@ def find_path(parent, root=None, path=None):
|
||||||
if r:
|
if r:
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def get_node(name, children):
|
def get_node(name, children, parent=None):
|
||||||
node = {
|
node = {
|
||||||
"size": len(children) + 100,
|
"size": len(children) + 100,
|
||||||
"name": name,
|
"name": name,
|
||||||
"children": [get_node(child, children[child]) for child in children]
|
"children": [get_node(child, children[child], name) for child in children]
|
||||||
}
|
}
|
||||||
if not node['children']:
|
if not node['children']:
|
||||||
del node['children']
|
del node['children']
|
||||||
|
key = '%s: %s' % (parent, name)
|
||||||
|
if key in keywords:
|
||||||
|
node['size'] = keywords[key]
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
base = os.path.abspath(os.path.dirname(__file__))
|
|
||||||
os.chdir(base)
|
os.chdir(base)
|
||||||
|
|
||||||
keywords = json.load(open('keywords.json'))
|
|
||||||
ontology = json.load(open('ontology.json'))
|
|
||||||
|
|
||||||
tree = defaultdict(dict)
|
tree = defaultdict(dict)
|
||||||
|
|
||||||
for keyword in keywords:
|
for keyword in keywords:
|
||||||
if ': ' not in keyword:
|
parent, child = keyword.split(': ')
|
||||||
parent = 'other'
|
|
||||||
child = keyword
|
|
||||||
else:
|
|
||||||
parent, child = keyword.split(': ')
|
|
||||||
path = find_path(parent)
|
path = find_path(parent)
|
||||||
if path:
|
if path:
|
||||||
p = tree
|
p = tree
|
||||||
|
@ -61,7 +61,7 @@ if __name__ == '__main__':
|
||||||
}
|
}
|
||||||
for name in tree:
|
for name in tree:
|
||||||
children = tree[name]
|
children = tree[name]
|
||||||
child = get_node(name, tree[name])
|
child = get_node(name, tree[name], name)
|
||||||
sized_ontology['children'].append(child)
|
sized_ontology['children'].append(child)
|
||||||
|
|
||||||
with open('../static/ontology/sized_ontology.json', 'w') as fd:
|
with open('../static/ontology/sized_ontology.json', 'w') as fd:
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
import collections
|
||||||
import getpass
|
|
||||||
import json
|
import json
|
||||||
import sys
|
|
||||||
|
|
||||||
import ox
|
import ox
|
||||||
import ox.api
|
import ox.api
|
||||||
|
@ -10,7 +8,7 @@ import ox.api
|
||||||
site = 'pandora.cinemusespace.com'
|
site = 'pandora.cinemusespace.com'
|
||||||
api = ox.api.signin('https://%s/api/' % site)
|
api = ox.api.signin('https://%s/api/' % site)
|
||||||
|
|
||||||
keywords = set()
|
keywords = collections.Counter()
|
||||||
for annotation in api.findAnnotations({
|
for annotation in api.findAnnotations({
|
||||||
'query': {
|
'query': {
|
||||||
'conditions': [{
|
'conditions': [{
|
||||||
|
@ -23,8 +21,10 @@ for annotation in api.findAnnotations({
|
||||||
'keys': ['id', 'in', 'out', 'value', 'user', 'created'],
|
'keys': ['id', 'in', 'out', 'value', 'user', 'created'],
|
||||||
'range': [0, 500000]
|
'range': [0, 500000]
|
||||||
})['data']['items']:
|
})['data']['items']:
|
||||||
keywords.add(annotation['value'])
|
keyword = annotation['value']
|
||||||
|
if ': ' not in keyword:
|
||||||
|
keyword = 'other: ' + keyword
|
||||||
|
keywords[keyword] += 1
|
||||||
|
|
||||||
with open('keywords.json', 'w') as fd:
|
with open('keywords.json', 'w') as fd:
|
||||||
json.dump(list(sorted(keywords)), fd, indent=4)
|
json.dump(keywords, fd, indent=4, ensure_ascii=False, sort_keys=True)
|
||||||
|
|
Loading…
Reference in a new issue