remove BeautifulSoup from spiegel
This commit is contained in:
parent
ac06e3feb0
commit
746b355c50
1 changed files with 4 additions and 5 deletions
|
@ -4,8 +4,6 @@ from datetime import datetime
|
|||
import re
|
||||
import time
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
import oxlib.cache
|
||||
from oxlib.html import decodeHtml, stripTags
|
||||
import oxlib.net
|
||||
|
@ -106,9 +104,10 @@ def getIssue(year, week):
|
|||
return None
|
||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||
contents = []
|
||||
soup = BeautifulSoup(oxlib.cache.readUrl(url))
|
||||
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
||||
item = str(item)
|
||||
data = oxlib.cache.readUrl(url)
|
||||
items = re.compile('<a.?href="http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=".?>(.*?)</a>').findall(data)
|
||||
for item in items:
|
||||
item = item[1]
|
||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||
title = stripTags(item).strip()
|
||||
contents.append({'title': title, 'page': page})
|
||||
|
|
Loading…
Reference in a new issue