even better criterion.py
This commit is contained in:
parent
252f9c371c
commit
9578097c45
1 changed files with 18 additions and 7 deletions
|
@ -5,22 +5,33 @@ from oxutils.cache import getUrlUnicode
|
|||
from oxutils.html import stripTags
|
||||
from oxutils.text import findRe
|
||||
|
||||
def getPosterUrl(title, director):
|
||||
# imdb module is currently broken
|
||||
imdbId = imdb.getMovieId(title, director)
|
||||
def getMovieId(title = '', director = '', imdbId = ''):
|
||||
if not imdbId:
|
||||
imdbId = imdb.getMovieId(title, director)
|
||||
html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine', timeout = -1)
|
||||
strings = findRe(html, '<table cellspacing="0" id="browse-all-table">(.*?)</table>').split('<tr>')
|
||||
strings.pop(0)
|
||||
for string in strings:
|
||||
criterionId = findRe(string, '"release.asp\?id=(.*?)"')
|
||||
id = findRe(string, '"release.asp\?id=(.*?)"')
|
||||
criterionTitle = findRe(string, 'class="title">(.*?)</a>')
|
||||
criterionTitle = re.sub('(?<=\\w)<br>(?=\\w)', ' / ', criterionTitle)
|
||||
criterionTitle = criterionTitle.replace('<br>', '')
|
||||
criterionDirector = stripTags(findRe(string, '</a>.*?</td>(.*?)</td>')).strip()
|
||||
# print ('%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector)).encode('utf-8')
|
||||
if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId:
|
||||
return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
|
||||
return id
|
||||
return ''
|
||||
|
||||
def getMovieData(title = '', director = '', imdbId = ''):
|
||||
if not imdbId:
|
||||
imdbId = imdb.getMovieId(title, director)
|
||||
id = getMovieId(imdbId = imdbId)
|
||||
if id:
|
||||
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % id)
|
||||
data = {}
|
||||
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
|
||||
data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % id
|
||||
return data
|
||||
return {}
|
||||
|
||||
if __name__ == '__main__':
|
||||
getPosterUrl('Le mepris', 'Jean-Luc Godard')
|
||||
print getMovieData('Le mepris', 'Jean-Luc Godard')
|
Loading…
Reference in a new issue