less BeautifulSoup
This commit is contained in:
parent
93bd2e72e9
commit
54492f33a6
1 changed files with 26 additions and 38 deletions
|
@ -187,18 +187,21 @@ def getMovieInfo(imdbId):
|
||||||
|
|
||||||
def getMovieRuntimeSeconds(imdbId):
|
def getMovieRuntimeSeconds(imdbId):
|
||||||
info = getMovieInfo(imdbId)
|
info = getMovieInfo(imdbId)
|
||||||
value = info['runtime'][0]
|
if 'runtime' in info:
|
||||||
parsed_value = findRe(value, '(.*?) min')
|
value = info['runtime'][0]
|
||||||
parsed_value = findRe(parsed_value, '([0-9]+)')
|
parsed_value = findRe(value, '(.*?) min')
|
||||||
if not parsed_value:
|
|
||||||
parsed_value = findRe(value, '(.*?) sec')
|
|
||||||
parsed_value = findRe(parsed_value, '([0-9]+)')
|
parsed_value = findRe(parsed_value, '([0-9]+)')
|
||||||
if not parsed_value:
|
if not parsed_value:
|
||||||
parsed_value = 0
|
parsed_value = findRe(value, '(.*?) sec')
|
||||||
|
parsed_value = findRe(parsed_value, '([0-9]+)')
|
||||||
|
if not parsed_value:
|
||||||
|
parsed_value = 0
|
||||||
|
else:
|
||||||
|
parsed_value = int(parsed_value)
|
||||||
else:
|
else:
|
||||||
parsed_value = int(parsed_value)
|
parsed_value = int(parsed_value) * 60
|
||||||
else:
|
else:
|
||||||
parsed_value = int(parsed_value) * 60
|
parsed_value = -1
|
||||||
return parsed_value
|
return parsed_value
|
||||||
|
|
||||||
def getMoviePoster(imdbId):
|
def getMoviePoster(imdbId):
|
||||||
|
@ -325,10 +328,7 @@ def getMovieCompanyCredits(imdbId):
|
||||||
def getMovieLocations(imdbId):
|
def getMovieLocations(imdbId):
|
||||||
url = "%slocations" % getUrlBase(imdbId)
|
url = "%slocations" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = getUrlUnicode(url)
|
||||||
soup = BeautifulSoup(data)
|
locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
|
||||||
locations = []
|
|
||||||
for key in soup('a', {'href': re.compile('^/List')}):
|
|
||||||
locations.append(decodeHtml(key.string))
|
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
|
def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
|
||||||
|
@ -360,16 +360,14 @@ def getMoviePosters(imdbId):
|
||||||
def getMovieTrivia(imdbId):
|
def getMovieTrivia(imdbId):
|
||||||
url = "%strivia" % getUrlBase(imdbId)
|
url = "%strivia" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = getUrlUnicode(url)
|
||||||
soup = BeautifulSoup(data)
|
trivia = re.compile('<li>(.*?)</li>').findall(data)
|
||||||
trivia = []
|
def clean(t):
|
||||||
triviaList = []
|
t = decodeHtml(t)
|
||||||
for i in soup('ul', {'class': "trivia"}):
|
t = t.replace(u'', '"')
|
||||||
for t in i('li'):
|
if t.endswith('<br><br>'):
|
||||||
t = unicode(t).replace('<br />', '').strip()
|
t = t[:-8]
|
||||||
if t.startswith('<li>') and t.endswith('</li>'):
|
return t.strip()
|
||||||
t = t[4:-5].strip()
|
trivia = [clean(t) for t in trivia]
|
||||||
t=decodeHtml(t)
|
|
||||||
trivia.append(t)
|
|
||||||
return trivia
|
return trivia
|
||||||
|
|
||||||
def getMovieConnections(imdbId):
|
def getMovieConnections(imdbId):
|
||||||
|
@ -393,21 +391,11 @@ def getMovieKeywords(imdbId):
|
||||||
def getMovieExternalReviews(imdbId):
|
def getMovieExternalReviews(imdbId):
|
||||||
url = "%sexternalreviews" % getUrlBase(imdbId)
|
url = "%sexternalreviews" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = getUrlUnicode(url)
|
||||||
soup = BeautifulSoup(data)
|
_reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
|
||||||
ol = soup('ol')
|
reviews = {}
|
||||||
if ol:
|
for r in _reviews:
|
||||||
ol = ol[0]
|
reviews[r[0]] = r[1]
|
||||||
ret = {}
|
return reviews
|
||||||
for li in ol('li'):
|
|
||||||
try:
|
|
||||||
a = li('a')[0]
|
|
||||||
href = a.get('href')
|
|
||||||
txt = a.contents[0]
|
|
||||||
ret[href] = txt
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ret
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def getMovieReleaseDate(imdbId):
|
def getMovieReleaseDate(imdbId):
|
||||||
releasedates = getMovieReleaseDates(imdbId)
|
releasedates = getMovieReleaseDates(imdbId)
|
||||||
|
|
Loading…
Reference in a new issue