better criterion.py
This commit is contained in:
parent
de6cd2408d
commit
f7735d30cd
2 changed files with 8 additions and 8 deletions
|
@ -6,6 +6,10 @@ from oxutils.html import stripTags
|
||||||
from oxutils.text import findRe
|
from oxutils.text import findRe
|
||||||
|
|
||||||
def getPosterUrl(title, director):
|
def getPosterUrl(title, director):
|
||||||
|
# imdb module is currently broken
|
||||||
|
'''
|
||||||
|
imdbId = imdb.getMovieId(title, director)
|
||||||
|
'''
|
||||||
html = getUrl('http://criterion.com/asp/list.asp?sort=spine')
|
html = getUrl('http://criterion.com/asp/list.asp?sort=spine')
|
||||||
strings = findRe(html, '<table cellspacing="0" id="browse-all-table">(.*?)</table>').split('<tr>')
|
strings = findRe(html, '<table cellspacing="0" id="browse-all-table">(.*?)</table>').split('<tr>')
|
||||||
strings.pop(0)
|
strings.pop(0)
|
||||||
|
@ -14,13 +18,10 @@ def getPosterUrl(title, director):
|
||||||
criterionTitle = findRe(string, 'class="title">(.*?)</a>')
|
criterionTitle = findRe(string, 'class="title">(.*?)</a>')
|
||||||
criterionTitle = re.sub('(?<=\\w)<br>(?=\\w)', ' / ', criterionTitle)
|
criterionTitle = re.sub('(?<=\\w)<br>(?=\\w)', ' / ', criterionTitle)
|
||||||
criterionTitle = criterionTitle.replace('<br>', '')
|
criterionTitle = criterionTitle.replace('<br>', '')
|
||||||
criteriondirector = stripTags(findRe(string, '</a>.*?</td>(.*?)</td>')).strip()
|
criterionDirector = stripTags(findRe(string, '</a>.*?</td>(.*?)</td>')).strip()
|
||||||
print '%s %s (%s)' % (criterionId, criterionTitle, criterionDirector)
|
print '%s: %s (%s)' % (criterionId, criterionTitle, criterionDirector)
|
||||||
# imdb module is currently broken
|
|
||||||
'''
|
'''
|
||||||
imdbId = imdb.getMovieId(title, director)
|
if imdb.getMovieId(criterionTitle, criterionDirector) == imdbId:
|
||||||
imdbData = imdb.getMovieData(imdbId)
|
|
||||||
if imdbData['title'] == title and imdbData['director'] == director:
|
|
||||||
return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
|
return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
|
||||||
'''
|
'''
|
||||||
return ''
|
return ''
|
||||||
|
@ -29,4 +30,4 @@ def test():
|
||||||
return
|
return
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
getPosterUrl('Le Mepris', 'Jean-Luc Godard')
|
getPosterUrl('Le mepris', 'Jean-Luc Godard')
|
|
@ -25,7 +25,6 @@ def getMovieId(title, director='', year=''):
|
||||||
query = 'site:imdb.com %s "%s"' % (director, title)
|
query = 'site:imdb.com %s "%s"' % (director, title)
|
||||||
else:
|
else:
|
||||||
query = 'site:imdb.com "%s"' % title
|
query = 'site:imdb.com "%s"' % title
|
||||||
print google.find(query, 3)
|
|
||||||
for (name, url, desc) in google.find(query, 3):
|
for (name, url, desc) in google.find(query, 3):
|
||||||
if url.startswith('http://www.imdb.com/title/tt'):
|
if url.startswith('http://www.imdb.com/title/tt'):
|
||||||
return url[28:35]
|
return url[28:35]
|
||||||
|
|
Loading…
Reference in a new issue