diff --git a/ox/imdb.py b/ox/imdb.py index a603acb..1be501a 100644 --- a/ox/imdb.py +++ b/ox/imdb.py @@ -96,7 +96,7 @@ def getMovieInfo(imdbId): if not html_title: html_title = soup('title') if html_title: - html_title = str(html_title[0]) + html_title = unicode(html_title[0]) html_title = html_title.replace('
', ' ').replace(' ', ' ') title = decodeHtml(html_title) title = stripTags(title) @@ -253,7 +253,7 @@ def getMovieTrivia(imdbId): triviaList = [] for i in soup('ul', {'class': "trivia"}): for t in i('li'): - t = str(t).replace('
', '').strip() + t = unicode(t).replace('
', '').strip() if t.startswith('
  • ') and t.endswith('
  • '): t = t[4:-5].strip() t=decodeHtml(t) @@ -266,7 +266,7 @@ def getMovieConnections(imdbId): soup = BeautifulSoup(data) connections = {} content = soup('div', {'id': 'tn15content'})[0] - blocks = str(content).split('
    ')[1:] + blocks = unicode(content).split('
    ')[1:] for c in blocks: connection = c.split('
    ')[0] cs = BeautifulSoup(c) @@ -404,7 +404,7 @@ class IMDb: if not html_title: html_title = soup('title') if html_title: - html_title = str(html_title[0]) + html_title = unicode(html_title[0]) html_title = stripTags(html_title) year = re.compile('\((\d{4})\)').findall(html_title) if not year: @@ -446,9 +446,9 @@ class IMDb: IMDbDict['runtime'] = 0 soup = BeautifulSoup(data) for info in soup('div', {'class': 'info'}): - key = str(info).split('')[0].split('
    ') + key = unicode(info).split('
    ')[0].split('
    ') if len(key) > 1: - raw_value = str(info).split('
    ')[1] + raw_value = unicode(info).split('')[1] key = key[1][:-1].lower().replace(' ', '_') if key in keys: IMDbDict[key] = self.parse_raw_value(key, raw_value) @@ -553,7 +553,7 @@ class IMDb: d = row('td', {'align':'right'}) if d: try: - possible_date = stripTags(str(d[0])).strip() + possible_date = stripTags(unicode(d[0])).strip() rdate = time.strptime(possible_date, "%d %B %Y") rdate = time.strftime('%Y-%m-%d', rdate) return rdate @@ -568,7 +568,7 @@ class IMDb: soup = BeautifulSoup(self.getBusiness()) business = {'budget': 0, 'gross': 0, 'profit': 0} content = soup('div', {'id': 'tn15content'})[0] - blocks = str(content).split('
    ')[1:] + blocks = unicode(content).split('
    ')[1:] for c in blocks: cs = BeautifulSoup(c) line = c.split('
    ')