diff --git a/ox/spiegel.py b/ox/spiegel.py index fe0bc72..49481ef 100644 --- a/ox/spiegel.py +++ b/ox/spiegel.py @@ -88,7 +88,7 @@ def formatSubsection(string): return string[:1].upper() + string[1:] def getMagazine(year, week): - coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d0%02d0001-312.jpg' % (year, week, year, week) + coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week) if not oxutils.net.exists(coverUrl): return None url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week) @@ -99,7 +99,15 @@ def getMagazine(year, week): page = int(re.compile('&SE=(.*?)"').findall(item)[0]) title = stripTags(item).strip() contents.append({'title': title, 'page': page}) - return {'contents': contents, 'coverUrl': coverUrl} + pageUrl = {} + pages = page + 2 + for page in range(1, pages + 10): + url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page) + if oxutils.net.exists(url): + pageUrl[page] = url + else: + pageUrl[page] = '' + return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl} def archiveMagazines(): @@ -125,13 +133,13 @@ def archiveMagazines(): dirname = '%s/%d/%02d' % (archivePath, y, w) if not os.path.exists(dirname): os.makedirs(dirname) - filename = '%s/Der Spiegel %d-%02d.json' % (dirname, y, w) + filename = '%s/Der Spiegel %d %02d.json' % (dirname, y, w) if not os.path.exists(filename) or True: data = simplejson.dumps(magazine, ensure_ascii = False) f = open(filename, 'w') f.write(data) f.close() - filename = '%s/Der Spiegel %d-%02d.txt' % (dirname, y, w) + filename = '%s/Der Spiegel %d %02d.txt' % (dirname, y, w) if not os.path.exists(filename) or True: data = [] for item in magazine['contents']: @@ -140,12 +148,21 @@ def archiveMagazines(): f = open(filename, 'w') f.write(data) f.close() - filename = '%s/Der Spiegel %d-%02d.jpg' % (dirname, y, w) + filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w) if not os.path.exists(filename): data = oxutils.cache.getUrl(magazine['coverUrl']) f = open(filename, 'w') f.write(data) f.close() + for page in magazine['pageUrl']: + url = magazine['pageUrl'][page] + if url: + filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page) + if not os.path.exists(filename): + data = oxutils.cache.getUrl(url) + f = open(filename, 'w') + f.write(data) + f.close() def archiveNews():