remove BeautifulSoup from spiegel

This commit is contained in:
j 2010-01-23 10:01:23 +11:00
parent ac06e3feb0
commit 746b355c50
1 changed files with 4 additions and 5 deletions

View File

@ -4,8 +4,6 @@ from datetime import datetime
import re
import time
from BeautifulSoup import BeautifulSoup
import oxlib.cache
from oxlib.html import decodeHtml, stripTags
import oxlib.net
@ -106,9 +104,10 @@ def getIssue(year, week):
return None
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
contents = []
soup = BeautifulSoup(oxlib.cache.readUrl(url))
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
item = str(item)
data = oxlib.cache.readUrl(url)
items = re.compile('<a.?href="http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=".?>(.*?)</a>').findall(data)
for item in items:
item = item[1]
page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
title = stripTags(item).strip()
contents.append({'title': title, 'page': page})