remove BeautifulSoup from spiegel

This commit is contained in:
j 2010-01-23 10:01:23 +11:00
parent ac06e3feb0
commit 746b355c50

View file

@ -4,8 +4,6 @@ from datetime import datetime
import re import re
import time import time
from BeautifulSoup import BeautifulSoup
import oxlib.cache import oxlib.cache
from oxlib.html import decodeHtml, stripTags from oxlib.html import decodeHtml, stripTags
import oxlib.net import oxlib.net
@ -106,9 +104,10 @@ def getIssue(year, week):
return None return None
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week) url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
contents = [] contents = []
soup = BeautifulSoup(oxlib.cache.readUrl(url)) data = oxlib.cache.readUrl(url)
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}): items = re.compile('<a.?href="http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=".?>(.*?)</a>').findall(data)
item = str(item) for item in items:
item = item[1]
page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0]) page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
title = stripTags(item).strip() title = stripTags(item).strip()
contents.append({'title': title, 'page': page}) contents.append({'title': title, 'page': page})