never use str only unicode

This commit is contained in:
j 2008-05-23 12:48:19 +02:00
parent 6a1ecbe21c
commit 87771c6645

View file

@ -96,7 +96,7 @@ def getMovieInfo(imdbId):
if not html_title:
html_title = soup('title')
if html_title:
html_title = str(html_title[0])
html_title = unicode(html_title[0])
html_title = html_title.replace('<br />', ' ').replace(' ', ' ')
title = decodeHtml(html_title)
title = stripTags(title)
@ -253,7 +253,7 @@ def getMovieTrivia(imdbId):
triviaList = []
for i in soup('ul', {'class': "trivia"}):
for t in i('li'):
t = str(t).replace('<br />', '').strip()
t = unicode(t).replace('<br />', '').strip()
if t.startswith('<li>') and t.endswith('</li>'):
t = t[4:-5].strip()
t=decodeHtml(t)
@ -266,7 +266,7 @@ def getMovieConnections(imdbId):
soup = BeautifulSoup(data)
connections = {}
content = soup('div', {'id': 'tn15content'})[0]
blocks = str(content).split('<h5>')[1:]
blocks = unicode(content).split('<h5>')[1:]
for c in blocks:
connection = c.split('</h5>')[0]
cs = BeautifulSoup(c)
@ -404,7 +404,7 @@ class IMDb:
if not html_title:
html_title = soup('title')
if html_title:
html_title = str(html_title[0])
html_title = unicode(html_title[0])
html_title = stripTags(html_title)
year = re.compile('\((\d{4})\)').findall(html_title)
if not year:
@ -446,9 +446,9 @@ class IMDb:
IMDbDict['runtime'] = 0
soup = BeautifulSoup(data)
for info in soup('div', {'class': 'info'}):
key = str(info).split('</h5>')[0].split('<h5>')
key = unicode(info).split('</h5>')[0].split('<h5>')
if len(key) > 1:
raw_value = str(info).split('</h5>')[1]
raw_value = unicode(info).split('</h5>')[1]
key = key[1][:-1].lower().replace(' ', '_')
if key in keys:
IMDbDict[key] = self.parse_raw_value(key, raw_value)
@ -553,7 +553,7 @@ class IMDb:
d = row('td', {'align':'right'})
if d:
try:
possible_date = stripTags(str(d[0])).strip()
possible_date = stripTags(unicode(d[0])).strip()
rdate = time.strptime(possible_date, "%d %B %Y")
rdate = time.strftime('%Y-%m-%d', rdate)
return rdate
@ -568,7 +568,7 @@ class IMDb:
soup = BeautifulSoup(self.getBusiness())
business = {'budget': 0, 'gross': 0, 'profit': 0}
content = soup('div', {'id': 'tn15content'})[0]
blocks = str(content).split('<h5>')[1:]
blocks = unicode(content).split('<h5>')[1:]
for c in blocks:
cs = BeautifulSoup(c)
line = c.split('</h5>')