From 9578097c4556d72d790ac2ebf06e63beec573e75 Mon Sep 17 00:00:00 2001 From: Rolux Date: Fri, 9 May 2008 12:39:20 +0200 Subject: [PATCH] even better criterion.py --- ox/criterion.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ox/criterion.py b/ox/criterion.py index 1d1927f..6daf4d7 100644 --- a/ox/criterion.py +++ b/ox/criterion.py @@ -5,22 +5,33 @@ from oxutils.cache import getUrlUnicode from oxutils.html import stripTags from oxutils.text import findRe -def getPosterUrl(title, director): - # imdb module is currently broken - imdbId = imdb.getMovieId(title, director) +def getMovieId(title = '', director = '', imdbId = ''): + if not imdbId: + imdbId = imdb.getMovieId(title, director) html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine', timeout = -1) strings = findRe(html, '(.*?)
').split('') strings.pop(0) for string in strings: - criterionId = findRe(string, '"release.asp\?id=(.*?)"') + id = findRe(string, '"release.asp\?id=(.*?)"') criterionTitle = findRe(string, 'class="title">(.*?)') criterionTitle = re.sub('(?<=\\w)
(?=\\w)', ' / ', criterionTitle) criterionTitle = criterionTitle.replace('
', '') criterionDirector = stripTags(findRe(string, '.*?(.*?)')).strip() - # print ('%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector)).encode('utf-8') if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId: - return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId + return id return '' +def getMovieData(title = '', director = '', imdbId = ''): + if not imdbId: + imdbId = imdb.getMovieId(title, director) + id = getMovieId(imdbId = imdbId) + if id: + html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % id) + data = {} + data['synopsis'] = stripTags(findRe(html, '

Synopsis

(.*?)')) + data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % id + return data + return {} + if __name__ == '__main__': - getPosterUrl('Le mepris', 'Jean-Luc Godard') \ No newline at end of file + print getMovieData('Le mepris', 'Jean-Luc Godard') \ No newline at end of file