pandora_cms/ontology/ontology.py

98 lines
3 KiB
Python
Raw Normal View History

import getpass
import json
import sys
import ox
import ox.web.auth
def parse_nodes(parents, nodes):
global strings
parsed = []
for name, children in nodes.items():
full_name = name if not parents else '{}: {}'.format(parents, name)
children = parse_nodes(full_name, children)
if not children:
strings.add(full_name)
parsed.append({
'name': full_name,
'children': children
})
return parsed
# get strings ("a: b: c: d") and parents ({"c": "a: b"})
strings = set()
data = json.loads(open('ontology.json').read())
parse_nodes('', data)
parents = {}
for string in sorted(strings):
parts = string.split(': ')
grandparent = ': '.join(parts[:-2])
parent = parts[-2]
parents[parent] = grandparent
# sign in
site = 'pandora.cinemusespace.com'
api = ox.API('https://{}/api/'.format(site))
update = False
try:
credentials = ox.web.auth.get(site)
except:
print('Please enter your username and password for {}:'.format(site))
credentials = {}
credentials['username'] = input('Username: ')
credentials['password'] = getpass.getpass('Password: ')
update = True
r = api.signin(**credentials)
if 'errors' in r.get('data', {}):
print(r['data'])
for kv in r['data']['errors'].items():
print('{}: {}'.format(*kv))
sys.exit(1)
if (update):
ox.web.auth.update(site, credentials)
# find keywords, add strings, count items for children and parents
child_items = {}
parent_items = {}
keywords = api.find({
'group': 'keywords',
'query': {
'conditions': [],
'operator': '&'
},
'range': [0, 1000000],
'sort': [{'key': 'name', 'operator': '-'}]
})['data']['items']
for keyword in keywords:
if not keyword['name']:
continue
parent = keyword['name'].split(': ')[0]
if parent in parents:
string = '{}: {}'.format(parents[parent], keyword['name'])
else:
string = '{}: {}'.format('unknown', keyword['name'])
if not parent:
parent = 'unknown'
strings.add(string)
child_items[string] = keyword['items']
parent_items[parent] = parent_items.get(parent, 0) + keyword['items']
# create html
html = []
for string in sorted(strings):
parts = string.split(': ')
parts_ = [p for p in parts]
if parts[-2] != 'unknown' and parts[-2] in parent_items:
parts_[-2] = '<a href="https://{}/clip/text/keywords={}:*" target="_blank" title="{}">{}</a>'.format(
site, parts[-2].replace(' ', '_'),
parent_items[parts[-2]], parts[-2]
)
if string in child_items:
2018-08-06 18:44:46 +00:00
parts_[-1] = '<a href="https://{}/clip/text/keywords=={}{}" target="_blank" title="{}">{}</a>'.format(
site, '{}:_'.format(parts[-2].replace(' ', '_')) if parts[-2] != 'unknown' else '',
parts[-1].replace(' ', '_'), child_items[string], parts[-1]
)
html.append(': '.join(parts_))
template = open('ontology_template.html').read()
open('ontology.html', 'w').write(template.replace('{TREE}', '<br>\n '.join(html)))