import re import ox.imdb as imdb from oxutils.cache import getUrl from oxutils.html import stripTags from oxutils.text import findRe def getPosterUrl(title, director): # imdb module is currently broken ''' imdbId = imdb.getMovieId(title, director) ''' html = getUrl('http://criterion.com/asp/list.asp?sort=spine') strings = findRe(html, '(.*?)
').split('') strings.pop(0) for string in strings: criterionid = findRe(string, '"release.asp\?id=(.*?)"') criterionTitle = findRe(string, 'class="title">(.*?)') criterionTitle = re.sub('(?<=\\w)
(?=\\w)', ' / ', criterionTitle) criterionTitle = criterionTitle.replace('
', '') criterionDirector = stripTags(findRe(string, '.*?(.*?)')).strip() print '%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector) ''' if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId: return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId ''' return '' def test(): return if __name__ == '__main__': getPosterUrl('Le mepris', 'Jean-Luc Godard')