spiegel.py: get actual pages
This commit is contained in:
parent
1b04735c68
commit
dd048aa4f2
1 changed files with 22 additions and 5 deletions
|
@ -88,7 +88,7 @@ def formatSubsection(string):
|
||||||
return string[:1].upper() + string[1:]
|
return string[:1].upper() + string[1:]
|
||||||
|
|
||||||
def getMagazine(year, week):
|
def getMagazine(year, week):
|
||||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d0%02d0001-312.jpg' % (year, week, year, week)
|
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
||||||
if not oxutils.net.exists(coverUrl):
|
if not oxutils.net.exists(coverUrl):
|
||||||
return None
|
return None
|
||||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||||
|
@ -99,7 +99,15 @@ def getMagazine(year, week):
|
||||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||||
title = stripTags(item).strip()
|
title = stripTags(item).strip()
|
||||||
contents.append({'title': title, 'page': page})
|
contents.append({'title': title, 'page': page})
|
||||||
return {'contents': contents, 'coverUrl': coverUrl}
|
pageUrl = {}
|
||||||
|
pages = page + 2
|
||||||
|
for page in range(1, pages + 10):
|
||||||
|
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
||||||
|
if oxutils.net.exists(url):
|
||||||
|
pageUrl[page] = url
|
||||||
|
else:
|
||||||
|
pageUrl[page] = ''
|
||||||
|
return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl}
|
||||||
|
|
||||||
|
|
||||||
def archiveMagazines():
|
def archiveMagazines():
|
||||||
|
@ -125,13 +133,13 @@ def archiveMagazines():
|
||||||
dirname = '%s/%d/%02d' % (archivePath, y, w)
|
dirname = '%s/%d/%02d' % (archivePath, y, w)
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
os.makedirs(dirname)
|
os.makedirs(dirname)
|
||||||
filename = '%s/Der Spiegel %d-%02d.json' % (dirname, y, w)
|
filename = '%s/Der Spiegel %d %02d.json' % (dirname, y, w)
|
||||||
if not os.path.exists(filename) or True:
|
if not os.path.exists(filename) or True:
|
||||||
data = simplejson.dumps(magazine, ensure_ascii = False)
|
data = simplejson.dumps(magazine, ensure_ascii = False)
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
filename = '%s/Der Spiegel %d-%02d.txt' % (dirname, y, w)
|
filename = '%s/Der Spiegel %d %02d.txt' % (dirname, y, w)
|
||||||
if not os.path.exists(filename) or True:
|
if not os.path.exists(filename) or True:
|
||||||
data = []
|
data = []
|
||||||
for item in magazine['contents']:
|
for item in magazine['contents']:
|
||||||
|
@ -140,12 +148,21 @@ def archiveMagazines():
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
filename = '%s/Der Spiegel %d-%02d.jpg' % (dirname, y, w)
|
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxutils.cache.getUrl(magazine['coverUrl'])
|
data = oxutils.cache.getUrl(magazine['coverUrl'])
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
for page in magazine['pageUrl']:
|
||||||
|
url = magazine['pageUrl'][page]
|
||||||
|
if url:
|
||||||
|
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
||||||
|
if not os.path.exists(filename):
|
||||||
|
data = oxutils.cache.getUrl(url)
|
||||||
|
f = open(filename, 'w')
|
||||||
|
f.write(data)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
def archiveNews():
|
def archiveNews():
|
||||||
|
|
Loading…
Reference in a new issue