less BeautifulSoup

This commit is contained in:
j 2009-08-06 12:10:57 +02:00
parent 93bd2e72e9
commit 54492f33a6
1 changed files with 26 additions and 38 deletions

View File

@ -187,18 +187,21 @@ def getMovieInfo(imdbId):
def getMovieRuntimeSeconds(imdbId): def getMovieRuntimeSeconds(imdbId):
info = getMovieInfo(imdbId) info = getMovieInfo(imdbId)
value = info['runtime'][0] if 'runtime' in info:
parsed_value = findRe(value, '(.*?) min') value = info['runtime'][0]
parsed_value = findRe(parsed_value, '([0-9]+)') parsed_value = findRe(value, '(.*?) min')
if not parsed_value:
parsed_value = findRe(value, '(.*?) sec')
parsed_value = findRe(parsed_value, '([0-9]+)') parsed_value = findRe(parsed_value, '([0-9]+)')
if not parsed_value: if not parsed_value:
parsed_value = 0 parsed_value = findRe(value, '(.*?) sec')
parsed_value = findRe(parsed_value, '([0-9]+)')
if not parsed_value:
parsed_value = 0
else:
parsed_value = int(parsed_value)
else: else:
parsed_value = int(parsed_value) parsed_value = int(parsed_value) * 60
else: else:
parsed_value = int(parsed_value) * 60 parsed_value = -1
return parsed_value return parsed_value
def getMoviePoster(imdbId): def getMoviePoster(imdbId):
@ -325,10 +328,7 @@ def getMovieCompanyCredits(imdbId):
def getMovieLocations(imdbId): def getMovieLocations(imdbId):
url = "%slocations" % getUrlBase(imdbId) url = "%slocations" % getUrlBase(imdbId)
data = getUrlUnicode(url) data = getUrlUnicode(url)
soup = BeautifulSoup(data) locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
locations = []
for key in soup('a', {'href': re.compile('^/List')}):
locations.append(decodeHtml(key.string))
return locations return locations
def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')): def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
@ -360,17 +360,15 @@ def getMoviePosters(imdbId):
def getMovieTrivia(imdbId): def getMovieTrivia(imdbId):
url = "%strivia" % getUrlBase(imdbId) url = "%strivia" % getUrlBase(imdbId)
data = getUrlUnicode(url) data = getUrlUnicode(url)
soup = BeautifulSoup(data) trivia = re.compile('<li>(.*?)</li>').findall(data)
trivia = [] def clean(t):
triviaList = [] t = decodeHtml(t)
for i in soup('ul', {'class': "trivia"}): t = t.replace(u'”', '"')
for t in i('li'): if t.endswith('<br><br>'):
t = unicode(t).replace('<br />', '').strip() t = t[:-8]
if t.startswith('<li>') and t.endswith('</li>'): return t.strip()
t = t[4:-5].strip() trivia = [clean(t) for t in trivia]
t=decodeHtml(t) return trivia
trivia.append(t)
return trivia
def getMovieConnections(imdbId): def getMovieConnections(imdbId):
url = "%smovieconnections" % getUrlBase(imdbId) url = "%smovieconnections" % getUrlBase(imdbId)
@ -393,21 +391,11 @@ def getMovieKeywords(imdbId):
def getMovieExternalReviews(imdbId): def getMovieExternalReviews(imdbId):
url = "%sexternalreviews" % getUrlBase(imdbId) url = "%sexternalreviews" % getUrlBase(imdbId)
data = getUrlUnicode(url) data = getUrlUnicode(url)
soup = BeautifulSoup(data) _reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
ol = soup('ol') reviews = {}
if ol: for r in _reviews:
ol = ol[0] reviews[r[0]] = r[1]
ret = {} return reviews
for li in ol('li'):
try:
a = li('a')[0]
href = a.get('href')
txt = a.contents[0]
ret[href] = txt
except:
pass
return ret
return {}
def getMovieReleaseDate(imdbId): def getMovieReleaseDate(imdbId):
releasedates = getMovieReleaseDates(imdbId) releasedates = getMovieReleaseDates(imdbId)