import re
import os
import simplejson
entries = {}
path = "../html/"
for dirname, dirs, files in os.walk(path):
for filename in files:
split = filename.split(".")
if not split[0][0] == "_" and split[1] == "html":
entries[split[0]] = {
"back": [],
"forward": [],
"term": "",
}
for entry in entries:
f = open(path + entry + ".html")
html = f.read()
f.close()
entries[entry]["term"] = re.compile('"bold large">(.*?)<').findall(html)[0]
entries[entry]["forward"] = list(set(re.compile('"#(.*?)"').findall(html)))
for entry_ in entries[entry]["forward"]:
entries[entry_]["back"].append(entry)
f = open(path + "_index.html", "w")
for entry in sorted(entries):
f.write("" + entries[entry]["term"] + "
")
f.write("> " + ", ".join(map(lambda x: entries[x]["term"], sorted(entries[entry]["forward"]))) + "
")
f.write("< " + ", ".join(map(lambda x: entries[x]["term"], sorted(entries[entry]["back"]))) + "
")
f.write("
")
f.close()
f = open("../json/dictionary.json", "w")
f.write(simplejson.dumps(entries))
f.close()