diff --git a/ox/criterion.py b/ox/criterion.py
index 1d1927f..6daf4d7 100644
--- a/ox/criterion.py
+++ b/ox/criterion.py
@@ -5,22 +5,33 @@ from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags
from oxutils.text import findRe
-def getPosterUrl(title, director):
- # imdb module is currently broken
- imdbId = imdb.getMovieId(title, director)
+def getMovieId(title = '', director = '', imdbId = ''):
+ if not imdbId:
+ imdbId = imdb.getMovieId(title, director)
html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine', timeout = -1)
strings = findRe(html, '
').split('')
strings.pop(0)
for string in strings:
- criterionId = findRe(string, '"release.asp\?id=(.*?)"')
+ id = findRe(string, '"release.asp\?id=(.*?)"')
criterionTitle = findRe(string, 'class="title">(.*?)')
criterionTitle = re.sub('(?<=\\w)
(?=\\w)', ' / ', criterionTitle)
criterionTitle = criterionTitle.replace('
', '')
criterionDirector = stripTags(findRe(string, '.*?(.*?)')).strip()
- # print ('%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector)).encode('utf-8')
if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId:
- return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
+ return id
return ''
+def getMovieData(title = '', director = '', imdbId = ''):
+ if not imdbId:
+ imdbId = imdb.getMovieId(title, director)
+ id = getMovieId(imdbId = imdbId)
+ if id:
+ html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % id)
+ data = {}
+ data['synopsis'] = stripTags(findRe(html, 'Synopsis
(.*?)'))
+ data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % id
+ return data
+ return {}
+
if __name__ == '__main__':
- getPosterUrl('Le mepris', 'Jean-Luc Godard')
\ No newline at end of file
+ print getMovieData('Le mepris', 'Jean-Luc Godard')
\ No newline at end of file