remove BeautifulSoup from spiegel
This commit is contained in:
parent
ac06e3feb0
commit
746b355c50
1 changed files with 4 additions and 5 deletions
|
@ -4,8 +4,6 @@ from datetime import datetime
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
|
||||||
|
|
||||||
import oxlib.cache
|
import oxlib.cache
|
||||||
from oxlib.html import decodeHtml, stripTags
|
from oxlib.html import decodeHtml, stripTags
|
||||||
import oxlib.net
|
import oxlib.net
|
||||||
|
@ -106,9 +104,10 @@ def getIssue(year, week):
|
||||||
return None
|
return None
|
||||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||||
contents = []
|
contents = []
|
||||||
soup = BeautifulSoup(oxlib.cache.readUrl(url))
|
data = oxlib.cache.readUrl(url)
|
||||||
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
items = re.compile('<a.?href="http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=".?>(.*?)</a>').findall(data)
|
||||||
item = str(item)
|
for item in items:
|
||||||
|
item = item[1]
|
||||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||||
title = stripTags(item).strip()
|
title = stripTags(item).strip()
|
||||||
contents.append({'title': title, 'page': page})
|
contents.append({'title': title, 'page': page})
|
||||||
|
|
Loading…
Reference in a new issue