update aaaarg
This commit is contained in:
parent
2b5c0b7108
commit
7a7a921fdb
1 changed files with 18 additions and 2 deletions
|
@ -21,7 +21,7 @@ def readUrlUnicode(url, timeout=cache.cache_timeout):
|
||||||
|
|
||||||
def downloadText(id, filename=None):
|
def downloadText(id, filename=None):
|
||||||
#FIXME, what about the cache, this keeps all pdfs in oxcache...
|
#FIXME, what about the cache, this keeps all pdfs in oxcache...
|
||||||
url='http://a.aaaarg.org/node/%d/download' % id
|
url='http://aaaaarg.org/node/%d/download' % id
|
||||||
data = readUrl(url, timeout=-1)
|
data = readUrl(url, timeout=-1)
|
||||||
headers = cache.getHeaders(url, timeout=-1)
|
headers = cache.getHeaders(url, timeout=-1)
|
||||||
if filename:
|
if filename:
|
||||||
|
@ -32,7 +32,7 @@ def downloadText(id, filename=None):
|
||||||
|
|
||||||
def getTextByLetter(letter):
|
def getTextByLetter(letter):
|
||||||
texts = []
|
texts = []
|
||||||
url = 'http://a.aaaarg.org/library/%s' % letter
|
url = 'http://aaaaarg.org/library/%s' % letter
|
||||||
data = readUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
txts = re.compile('<li class="author">(.*?)</li><li class="title"><a href="(.*?)">(.*?)</a></li>').findall(data)
|
txts = re.compile('<li class="author">(.*?)</li><li class="title"><a href="(.*?)">(.*?)</a></li>').findall(data)
|
||||||
author = 'Unknown Author'
|
author = 'Unknown Author'
|
||||||
|
@ -53,9 +53,25 @@ def getTextByLetter(letter):
|
||||||
})
|
})
|
||||||
return texts
|
return texts
|
||||||
|
|
||||||
|
def getData(id):
|
||||||
|
url = "http://aaaaarg.org/node/%s"%id
|
||||||
|
data=readUrlUnicode(url)
|
||||||
|
|
||||||
|
title = findRe(data, '<h2>(.*?)</h2>')
|
||||||
|
author = findRe(data, '<div class="author"><em>written by (.*?)</em></div>')
|
||||||
|
links = re.compile('<a href="http://anonym.to/\?(.*?)" class="link-to-text">').findall(data)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'aaaaarg': id,
|
||||||
|
'links': links,
|
||||||
|
'title': title,
|
||||||
|
'author': author
|
||||||
|
}
|
||||||
|
|
||||||
def getTexts():
|
def getTexts():
|
||||||
texts = []
|
texts = []
|
||||||
for letter in string.letters[:26]:
|
for letter in string.letters[:26]:
|
||||||
texts += getTextByLetter(letter)
|
texts += getTextByLetter(letter)
|
||||||
|
texts += getTextByLetter('date')
|
||||||
return texts
|
return texts
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue