From f666c4f61dd034a83b1dc86392a8b8132cd7520e Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 16 Jun 2007 16:10:34 +0000 Subject: [PATCH] lets try utf-8 --- scrapeit/imdb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index 7d9a376..5f83ad7 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -98,11 +98,12 @@ class IMDb: def getPage(self, forcereload = False): if forcereload or not self.pageSource: - self.pageSource = read_url(self.pageUrl) + self.pageSource = read_url_utf8(self.pageUrl) return self.pageSource def parse_raw_value(self, key, value): if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'): + value = unicode(value, 'utf-8') value = stripTags(value).strip() if key == 'runtime': parsed_value = _getTerm(value, '(.*?) min')