never use str only unicode
This commit is contained in:
parent
6a1ecbe21c
commit
87771c6645
1 changed files with 8 additions and 8 deletions
16
ox/imdb.py
16
ox/imdb.py
|
@ -96,7 +96,7 @@ def getMovieInfo(imdbId):
|
||||||
if not html_title:
|
if not html_title:
|
||||||
html_title = soup('title')
|
html_title = soup('title')
|
||||||
if html_title:
|
if html_title:
|
||||||
html_title = str(html_title[0])
|
html_title = unicode(html_title[0])
|
||||||
html_title = html_title.replace('<br />', ' ').replace(' ', ' ')
|
html_title = html_title.replace('<br />', ' ').replace(' ', ' ')
|
||||||
title = decodeHtml(html_title)
|
title = decodeHtml(html_title)
|
||||||
title = stripTags(title)
|
title = stripTags(title)
|
||||||
|
@ -253,7 +253,7 @@ def getMovieTrivia(imdbId):
|
||||||
triviaList = []
|
triviaList = []
|
||||||
for i in soup('ul', {'class': "trivia"}):
|
for i in soup('ul', {'class': "trivia"}):
|
||||||
for t in i('li'):
|
for t in i('li'):
|
||||||
t = str(t).replace('<br />', '').strip()
|
t = unicode(t).replace('<br />', '').strip()
|
||||||
if t.startswith('<li>') and t.endswith('</li>'):
|
if t.startswith('<li>') and t.endswith('</li>'):
|
||||||
t = t[4:-5].strip()
|
t = t[4:-5].strip()
|
||||||
t=decodeHtml(t)
|
t=decodeHtml(t)
|
||||||
|
@ -266,7 +266,7 @@ def getMovieConnections(imdbId):
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
connections = {}
|
connections = {}
|
||||||
content = soup('div', {'id': 'tn15content'})[0]
|
content = soup('div', {'id': 'tn15content'})[0]
|
||||||
blocks = str(content).split('<h5>')[1:]
|
blocks = unicode(content).split('<h5>')[1:]
|
||||||
for c in blocks:
|
for c in blocks:
|
||||||
connection = c.split('</h5>')[0]
|
connection = c.split('</h5>')[0]
|
||||||
cs = BeautifulSoup(c)
|
cs = BeautifulSoup(c)
|
||||||
|
@ -404,7 +404,7 @@ class IMDb:
|
||||||
if not html_title:
|
if not html_title:
|
||||||
html_title = soup('title')
|
html_title = soup('title')
|
||||||
if html_title:
|
if html_title:
|
||||||
html_title = str(html_title[0])
|
html_title = unicode(html_title[0])
|
||||||
html_title = stripTags(html_title)
|
html_title = stripTags(html_title)
|
||||||
year = re.compile('\((\d{4})\)').findall(html_title)
|
year = re.compile('\((\d{4})\)').findall(html_title)
|
||||||
if not year:
|
if not year:
|
||||||
|
@ -446,9 +446,9 @@ class IMDb:
|
||||||
IMDbDict['runtime'] = 0
|
IMDbDict['runtime'] = 0
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
for info in soup('div', {'class': 'info'}):
|
for info in soup('div', {'class': 'info'}):
|
||||||
key = str(info).split('</h5>')[0].split('<h5>')
|
key = unicode(info).split('</h5>')[0].split('<h5>')
|
||||||
if len(key) > 1:
|
if len(key) > 1:
|
||||||
raw_value = str(info).split('</h5>')[1]
|
raw_value = unicode(info).split('</h5>')[1]
|
||||||
key = key[1][:-1].lower().replace(' ', '_')
|
key = key[1][:-1].lower().replace(' ', '_')
|
||||||
if key in keys:
|
if key in keys:
|
||||||
IMDbDict[key] = self.parse_raw_value(key, raw_value)
|
IMDbDict[key] = self.parse_raw_value(key, raw_value)
|
||||||
|
@ -553,7 +553,7 @@ class IMDb:
|
||||||
d = row('td', {'align':'right'})
|
d = row('td', {'align':'right'})
|
||||||
if d:
|
if d:
|
||||||
try:
|
try:
|
||||||
possible_date = stripTags(str(d[0])).strip()
|
possible_date = stripTags(unicode(d[0])).strip()
|
||||||
rdate = time.strptime(possible_date, "%d %B %Y")
|
rdate = time.strptime(possible_date, "%d %B %Y")
|
||||||
rdate = time.strftime('%Y-%m-%d', rdate)
|
rdate = time.strftime('%Y-%m-%d', rdate)
|
||||||
return rdate
|
return rdate
|
||||||
|
@ -568,7 +568,7 @@ class IMDb:
|
||||||
soup = BeautifulSoup(self.getBusiness())
|
soup = BeautifulSoup(self.getBusiness())
|
||||||
business = {'budget': 0, 'gross': 0, 'profit': 0}
|
business = {'budget': 0, 'gross': 0, 'profit': 0}
|
||||||
content = soup('div', {'id': 'tn15content'})[0]
|
content = soup('div', {'id': 'tn15content'})[0]
|
||||||
blocks = str(content).split('<h5>')[1:]
|
blocks = unicode(content).split('<h5>')[1:]
|
||||||
for c in blocks:
|
for c in blocks:
|
||||||
cs = BeautifulSoup(c)
|
cs = BeautifulSoup(c)
|
||||||
line = c.split('</h5>')
|
line = c.split('</h5>')
|
||||||
|
|
Loading…
Reference in a new issue