less BeautifulSoup

This commit is contained in:
j 2009-08-06 12:10:57 +02:00
parent 93bd2e72e9
commit 54492f33a6

View file

@ -187,6 +187,7 @@ def getMovieInfo(imdbId):
def getMovieRuntimeSeconds(imdbId):
info = getMovieInfo(imdbId)
if 'runtime' in info:
value = info['runtime'][0]
parsed_value = findRe(value, '(.*?) min')
parsed_value = findRe(parsed_value, '([0-9]+)')
@ -199,6 +200,8 @@ def getMovieRuntimeSeconds(imdbId):
parsed_value = int(parsed_value)
else:
parsed_value = int(parsed_value) * 60
else:
parsed_value = -1
return parsed_value
def getMoviePoster(imdbId):
@ -325,10 +328,7 @@ def getMovieCompanyCredits(imdbId):
def getMovieLocations(imdbId):
url = "%slocations" % getUrlBase(imdbId)
data = getUrlUnicode(url)
soup = BeautifulSoup(data)
locations = []
for key in soup('a', {'href': re.compile('^/List')}):
locations.append(decodeHtml(key.string))
locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
return locations
def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
@ -360,16 +360,14 @@ def getMoviePosters(imdbId):
def getMovieTrivia(imdbId):
url = "%strivia" % getUrlBase(imdbId)
data = getUrlUnicode(url)
soup = BeautifulSoup(data)
trivia = []
triviaList = []
for i in soup('ul', {'class': "trivia"}):
for t in i('li'):
t = unicode(t).replace('<br />', '').strip()
if t.startswith('<li>') and t.endswith('</li>'):
t = t[4:-5].strip()
t=decodeHtml(t)
trivia.append(t)
trivia = re.compile('<li>(.*?)</li>').findall(data)
def clean(t):
t = decodeHtml(t)
t = t.replace(u'”', '"')
if t.endswith('<br><br>'):
t = t[:-8]
return t.strip()
trivia = [clean(t) for t in trivia]
return trivia
def getMovieConnections(imdbId):
@ -393,21 +391,11 @@ def getMovieKeywords(imdbId):
def getMovieExternalReviews(imdbId):
url = "%sexternalreviews" % getUrlBase(imdbId)
data = getUrlUnicode(url)
soup = BeautifulSoup(data)
ol = soup('ol')
if ol:
ol = ol[0]
ret = {}
for li in ol('li'):
try:
a = li('a')[0]
href = a.get('href')
txt = a.contents[0]
ret[href] = txt
except:
pass
return ret
return {}
_reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
reviews = {}
for r in _reviews:
reviews[r[0]] = r[1]
return reviews
def getMovieReleaseDate(imdbId):
releasedates = getMovieReleaseDates(imdbId)