From 252f9c371cb35536523e7db9698b4d4b5c2fcbe0 Mon Sep 17 00:00:00 2001 From: Rolux Date: Fri, 9 May 2008 12:21:59 +0200 Subject: [PATCH] better criterion.py --- ox/criterion.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/ox/criterion.py b/ox/criterion.py index 3e6e653..1d1927f 100644 --- a/ox/criterion.py +++ b/ox/criterion.py @@ -1,33 +1,26 @@ import re import ox.imdb as imdb -from oxutils.cache import getUrl +from oxutils.cache import getUrlUnicode from oxutils.html import stripTags from oxutils.text import findRe def getPosterUrl(title, director): # imdb module is currently broken - ''' imdbId = imdb.getMovieId(title, director) - ''' - html = getUrl('http://criterion.com/asp/list.asp?sort=spine') + html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine', timeout = -1) strings = findRe(html, '(.*?)
').split('') strings.pop(0) for string in strings: - criterionid = findRe(string, '"release.asp\?id=(.*?)"') + criterionId = findRe(string, '"release.asp\?id=(.*?)"') criterionTitle = findRe(string, 'class="title">(.*?)') criterionTitle = re.sub('(?<=\\w)
(?=\\w)', ' / ', criterionTitle) criterionTitle = criterionTitle.replace('
', '') criterionDirector = stripTags(findRe(string, '.*?(.*?)')).strip() - print '%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector) - ''' + # print ('%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector)).encode('utf-8') if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId: return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId - ''' return '' -def test(): - return - if __name__ == '__main__': getPosterUrl('Le mepris', 'Jean-Luc Godard') \ No newline at end of file