import re import os import simplejson entries = {} path = "../html/" for dirname, dirs, files in os.walk(path): for filename in files: split = filename.split(".") if not split[0][0] == "_" and split[1] == "html": entries[split[0]] = { "back": [], "forward": [], "term": "", } for entry in entries: f = open(path + entry + ".html") html = f.read() f.close() entries[entry]["term"] = re.compile('"bold large">(.*?)<').findall(html)[0] entries[entry]["forward"] = list(set(re.compile('"#(.*?)"').findall(html))) for entry_ in entries[entry]["forward"]: entries[entry_]["back"].append(entry) f = open(path + "_index.html", "w") for entry in sorted(entries): f.write("" + entries[entry]["term"] + "
") f.write("> " + ", ".join(map(lambda x: entries[x]["term"], sorted(entries[entry]["forward"]))) + "
") f.write("< " + ", ".join(map(lambda x: entries[x]["term"], sorted(entries[entry]["back"]))) + "
") f.write("
") f.close() f = open("../json/dictionary.json", "w") f.write(simplejson.dumps(entries)) f.close()