From f386b7dabaca4f145ae5f44b8767b46270685d42 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 22 Jun 2007 15:09:34 +0000 Subject: [PATCH] lets try utf-8 --- scrapeit/imdb.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index 4e6f1a4..f71bcd6 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -233,7 +233,7 @@ class IMDb: def getCredits(self, forcereload = False): if forcereload or not self.creditsSource: - self.creditsSource = read_url(self.creditsUrl) + self.creditsSource = read_url_utf8(self.creditsUrl) return self.creditsSource def parseCredits(self): @@ -261,14 +261,14 @@ class IMDb: def getPlot(self, forcereload = False): if forcereload or not self.plotSource: - self.plotSource = read_url(self.plotUrl) + self.plotSource = read_url_utf8(self.plotUrl) return self.plotSource def parsePlot(self): soup = BeautifulSoup(self.getPlot()) plot = soup('p', {'class':'plotpar'}) if plot: - plot = str(plot[0]).split('')[0] + plot = unicode(plot[0]).split('')[0] else: plot = u'' plot = stripTags(plot).strip() @@ -277,7 +277,7 @@ class IMDb: def getEpisodes(self, forcereload = False): if forcereload or not self.episodesSource: - self.episodesSource = read_url(self.episodesUrl) + self.episodesSource = read_url_utf8(self.episodesUrl) return self.episodesSource def parseEpisodes(self): @@ -305,7 +305,7 @@ class IMDb: def getKeywords(self, forcereload = False): if forcereload or not self.keywordSource: - self.keywordSource = read_url(self.keywordUrl) + self.keywordSource = read_url_utf8(self.keywordUrl) return self.keywordSource def parseKeywords(self): @@ -318,7 +318,7 @@ class IMDb: def getTrivia(self, forcereload = False): if forcereload or not self.triviaSource: - self.triviaSource = read_url(self.triviaUrl) + self.triviaSource = read_url_utf8(self.triviaUrl) return self.triviaSource def parseTrivia(self): @@ -336,7 +336,7 @@ class IMDb: def getConnections(self, forcereload = False): if forcereload or not self.connectionsSource: - self.connectionsSource = read_url(self.connectionsUrl) + self.connectionsSource = read_url_utf8(self.connectionsUrl) return self.connectionsSource def parseConnections(self): @@ -354,7 +354,7 @@ class IMDb: def getReleaseinfo(self, forcereload = False): if forcereload or not self.releaseinfoSource: - self.releaseinfoSource = read_url(self.releaseinfoUrl) + self.releaseinfoSource = read_url_utf8(self.releaseinfoUrl) return self.releaseinfoSource def parseReleaseinfo(self): @@ -373,7 +373,7 @@ class IMDb: def getBusiness(self, forcereload = False): if forcereload or not self.businessSource: - self.businessSource = read_url(self.businessUrl) + self.businessSource = read_url_utf8(self.businessUrl) return self.businessSource def parseBusiness(self):