spiegel.py: get actual pages
This commit is contained in:
parent
1b04735c68
commit
dd048aa4f2
1 changed files with 22 additions and 5 deletions
|
@ -88,7 +88,7 @@ def formatSubsection(string):
|
|||
return string[:1].upper() + string[1:]
|
||||
|
||||
def getMagazine(year, week):
|
||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d0%02d0001-312.jpg' % (year, week, year, week)
|
||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
||||
if not oxutils.net.exists(coverUrl):
|
||||
return None
|
||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||
|
@ -99,7 +99,15 @@ def getMagazine(year, week):
|
|||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||
title = stripTags(item).strip()
|
||||
contents.append({'title': title, 'page': page})
|
||||
return {'contents': contents, 'coverUrl': coverUrl}
|
||||
pageUrl = {}
|
||||
pages = page + 2
|
||||
for page in range(1, pages + 10):
|
||||
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
||||
if oxutils.net.exists(url):
|
||||
pageUrl[page] = url
|
||||
else:
|
||||
pageUrl[page] = ''
|
||||
return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl}
|
||||
|
||||
|
||||
def archiveMagazines():
|
||||
|
@ -125,13 +133,13 @@ def archiveMagazines():
|
|||
dirname = '%s/%d/%02d' % (archivePath, y, w)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
filename = '%s/Der Spiegel %d-%02d.json' % (dirname, y, w)
|
||||
filename = '%s/Der Spiegel %d %02d.json' % (dirname, y, w)
|
||||
if not os.path.exists(filename) or True:
|
||||
data = simplejson.dumps(magazine, ensure_ascii = False)
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
filename = '%s/Der Spiegel %d-%02d.txt' % (dirname, y, w)
|
||||
filename = '%s/Der Spiegel %d %02d.txt' % (dirname, y, w)
|
||||
if not os.path.exists(filename) or True:
|
||||
data = []
|
||||
for item in magazine['contents']:
|
||||
|
@ -140,12 +148,21 @@ def archiveMagazines():
|
|||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
filename = '%s/Der Spiegel %d-%02d.jpg' % (dirname, y, w)
|
||||
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
||||
if not os.path.exists(filename):
|
||||
data = oxutils.cache.getUrl(magazine['coverUrl'])
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
for page in magazine['pageUrl']:
|
||||
url = magazine['pageUrl'][page]
|
||||
if url:
|
||||
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
||||
if not os.path.exists(filename):
|
||||
data = oxutils.cache.getUrl(url)
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
||||
|
||||
def archiveNews():
|
||||
|
|
Loading…
Reference in a new issue