adding spiegel.py
This commit is contained in:
parent
567e7e702d
commit
025297a231
1 changed files with 44 additions and 0 deletions
44
ox/spiegel.py
Normal file
44
ox/spiegel.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
import re
|
||||||
|
from time import gmtime, strftime
|
||||||
|
|
||||||
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
from oxutils.cache import getUrl
|
||||||
|
from oxutils.html import stripTags
|
||||||
|
from oxutils.text import findRegexp
|
||||||
|
|
||||||
|
class Spiegel:
|
||||||
|
def __init__(self, year, week):
|
||||||
|
# fixme: simply check if cover exists
|
||||||
|
thisYear = int(strftime('%Y', gmtime()))
|
||||||
|
thisWeek = int(strftime('%W', gmtime()))
|
||||||
|
years = range(1994, thisYear + 1)
|
||||||
|
if year == thisYear:
|
||||||
|
weeks = range(1, thisWeek + 2)
|
||||||
|
elif year in [1998, 2004]:
|
||||||
|
weeks = range(1, 54)
|
||||||
|
else:
|
||||||
|
weeks = range(1, 53)
|
||||||
|
if year not in years or week not in weeks:
|
||||||
|
return None
|
||||||
|
# end fixme
|
||||||
|
self.year = year
|
||||||
|
self.week = week
|
||||||
|
self.coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d0%02d0001-312.jpg' % (self.year, self.week, self.year, self.week)
|
||||||
|
self.contentsUrl = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (self.year, self.week)
|
||||||
|
|
||||||
|
def getContents(self):
|
||||||
|
self.contents = []
|
||||||
|
soup = BeautifulSoup(getUrl(self.contentsUrl))
|
||||||
|
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
||||||
|
item = str(item)
|
||||||
|
title = stripTags(item).strip()
|
||||||
|
print item, '\n'
|
||||||
|
page = re.compile('&SE=(.*?)"').findall(item)[0]
|
||||||
|
self.contents.append({'title': title, 'page': page})
|
||||||
|
return self.contents
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
spiegel = Spiegel(2008, 8)
|
||||||
|
spiegel.getContents()
|
||||||
|
print spiegel.contents
|
Loading…
Reference in a new issue