less BeautifulSoup
This commit is contained in:
parent
93bd2e72e9
commit
54492f33a6
1 changed files with 26 additions and 38 deletions
|
@ -187,6 +187,7 @@ def getMovieInfo(imdbId):
|
|||
|
||||
def getMovieRuntimeSeconds(imdbId):
|
||||
info = getMovieInfo(imdbId)
|
||||
if 'runtime' in info:
|
||||
value = info['runtime'][0]
|
||||
parsed_value = findRe(value, '(.*?) min')
|
||||
parsed_value = findRe(parsed_value, '([0-9]+)')
|
||||
|
@ -199,6 +200,8 @@ def getMovieRuntimeSeconds(imdbId):
|
|||
parsed_value = int(parsed_value)
|
||||
else:
|
||||
parsed_value = int(parsed_value) * 60
|
||||
else:
|
||||
parsed_value = -1
|
||||
return parsed_value
|
||||
|
||||
def getMoviePoster(imdbId):
|
||||
|
@ -325,10 +328,7 @@ def getMovieCompanyCredits(imdbId):
|
|||
def getMovieLocations(imdbId):
|
||||
url = "%slocations" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
soup = BeautifulSoup(data)
|
||||
locations = []
|
||||
for key in soup('a', {'href': re.compile('^/List')}):
|
||||
locations.append(decodeHtml(key.string))
|
||||
locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
|
||||
return locations
|
||||
|
||||
def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
|
||||
|
@ -360,16 +360,14 @@ def getMoviePosters(imdbId):
|
|||
def getMovieTrivia(imdbId):
|
||||
url = "%strivia" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
soup = BeautifulSoup(data)
|
||||
trivia = []
|
||||
triviaList = []
|
||||
for i in soup('ul', {'class': "trivia"}):
|
||||
for t in i('li'):
|
||||
t = unicode(t).replace('<br />', '').strip()
|
||||
if t.startswith('<li>') and t.endswith('</li>'):
|
||||
t = t[4:-5].strip()
|
||||
trivia = re.compile('<li>(.*?)</li>').findall(data)
|
||||
def clean(t):
|
||||
t = decodeHtml(t)
|
||||
trivia.append(t)
|
||||
t = t.replace(u'', '"')
|
||||
if t.endswith('<br><br>'):
|
||||
t = t[:-8]
|
||||
return t.strip()
|
||||
trivia = [clean(t) for t in trivia]
|
||||
return trivia
|
||||
|
||||
def getMovieConnections(imdbId):
|
||||
|
@ -393,21 +391,11 @@ def getMovieKeywords(imdbId):
|
|||
def getMovieExternalReviews(imdbId):
|
||||
url = "%sexternalreviews" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
soup = BeautifulSoup(data)
|
||||
ol = soup('ol')
|
||||
if ol:
|
||||
ol = ol[0]
|
||||
ret = {}
|
||||
for li in ol('li'):
|
||||
try:
|
||||
a = li('a')[0]
|
||||
href = a.get('href')
|
||||
txt = a.contents[0]
|
||||
ret[href] = txt
|
||||
except:
|
||||
pass
|
||||
return ret
|
||||
return {}
|
||||
_reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
|
||||
reviews = {}
|
||||
for r in _reviews:
|
||||
reviews[r[0]] = r[1]
|
||||
return reviews
|
||||
|
||||
def getMovieReleaseDate(imdbId):
|
||||
releasedates = getMovieReleaseDates(imdbId)
|
||||
|
|
Loading…
Reference in a new issue