better criterion.py

This commit is contained in:
Rolux 2008-05-09 12:21:59 +02:00
parent 8a124fca9d
commit 252f9c371c

View file

@ -1,33 +1,26 @@
import re import re
import ox.imdb as imdb import ox.imdb as imdb
from oxutils.cache import getUrl from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags from oxutils.html import stripTags
from oxutils.text import findRe from oxutils.text import findRe
def getPosterUrl(title, director): def getPosterUrl(title, director):
# imdb module is currently broken # imdb module is currently broken
'''
imdbId = imdb.getMovieId(title, director) imdbId = imdb.getMovieId(title, director)
''' html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine', timeout = -1)
html = getUrl('http://criterion.com/asp/list.asp?sort=spine')
strings = findRe(html, '<table cellspacing="0" id="browse-all-table">(.*?)</table>').split('<tr>') strings = findRe(html, '<table cellspacing="0" id="browse-all-table">(.*?)</table>').split('<tr>')
strings.pop(0) strings.pop(0)
for string in strings: for string in strings:
criterionid = findRe(string, '"release.asp\?id=(.*?)"') criterionId = findRe(string, '"release.asp\?id=(.*?)"')
criterionTitle = findRe(string, 'class="title">(.*?)</a>') criterionTitle = findRe(string, 'class="title">(.*?)</a>')
criterionTitle = re.sub('(?<=\\w)<br>(?=\\w)', ' / ', criterionTitle) criterionTitle = re.sub('(?<=\\w)<br>(?=\\w)', ' / ', criterionTitle)
criterionTitle = criterionTitle.replace('<br>', '') criterionTitle = criterionTitle.replace('<br>', '')
criterionDirector = stripTags(findRe(string, '</a>.*?</td>(.*?)</td>')).strip() criterionDirector = stripTags(findRe(string, '</a>.*?</td>(.*?)</td>')).strip()
print '%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector) # print ('%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector)).encode('utf-8')
'''
if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId: if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId:
return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
'''
return '' return ''
def test():
return
if __name__ == '__main__': if __name__ == '__main__':
getPosterUrl('Le mepris', 'Jean-Luc Godard') getPosterUrl('Le mepris', 'Jean-Luc Godard')