diff --git a/oxweb/aaaarg.py b/oxweb/aaaarg.py new file mode 100644 index 0000000..cca6385 --- /dev/null +++ b/oxweb/aaaarg.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +import os +import string + +from oxlib import cache +from oxlib.html import stripTags, decodeHtml +from oxlib.text import findRe +from oxlib.normalize import canonicalName +import auth + + +def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): + headers = headers.copy() + headers["Cookie"] = auth.get("aaaarg.cookie") + return cache.readUrl(url, data, headers, timeout) + +def readUrlUnicode(url, timeout=cache.cache_timeout): + return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout) + +def downloadText(id, filename=None): + #FIXME, what about the cache, this keeps all pdfs in oxcache... + url='http://a.aaaarg.org/node/%d/download' % id + data = readUrl(url, timeout=-1) + headers = cache.getHeaders(url, timeout=-1) + if filename: + with open(filename, "w") as f: + f.write(data) + return + return data + +def getTextByLetter(letter): + texts = [] + url = 'http://a.aaaarg.org/library/%s' % letter + data = readUrlUnicode(url) + txts = re.compile('