From 746b355c5048ef45e2d5cdcbe68dd183fd7b240e Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 23 Jan 2010 10:01:23 +1100 Subject: [PATCH] remove BeautifulSoup from spiegel --- oxweb/spiegel.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/oxweb/spiegel.py b/oxweb/spiegel.py index 403b9f6..129ac62 100644 --- a/oxweb/spiegel.py +++ b/oxweb/spiegel.py @@ -4,8 +4,6 @@ from datetime import datetime import re import time -from BeautifulSoup import BeautifulSoup - import oxlib.cache from oxlib.html import decodeHtml, stripTags import oxlib.net @@ -106,9 +104,10 @@ def getIssue(year, week): return None url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week) contents = [] - soup = BeautifulSoup(oxlib.cache.readUrl(url)) - for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}): - item = str(item) + data = oxlib.cache.readUrl(url) + items = re.compile('(.*?)').findall(data) + for item in items: + item = item[1] page = int(re.compile('&SE=(.*?)"').findall(item)[0]) title = stripTags(item).strip() contents.append({'title': title, 'page': page})