fix criterion

This commit is contained in:
j 2010-07-18 20:57:22 +02:00
parent 2050c9deae
commit b3a7546344
2 changed files with 24 additions and 6 deletions

View file

@ -34,17 +34,18 @@ def getData(id):
except: except:
html = ox.cache.getUrl(data["url"]) html = ox.cache.getUrl(data["url"])
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>") data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>") data["title"] = findRe(html, "<meta property=['\"]og:title['\"] content=['\"](.*?)['\"]")
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>") data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")
results = re.compile("<p><strong>(.*?)</strong></p>").findall(html) results = findRe(html, '<div class="left_column">(.*?)</div>')
results = re.compile("<li>(.*?)</li>").findall(results)
data["country"] = results[0] data["country"] = results[0]
data["year"] = results[1] data["year"] = results[1]
result = findRe(html, "<div class=\"synopsis contentbox lightgray\">(.*?)</div>") data["synopsis"] = stripTags(findRe(html, "<p><strong>SYNOPSIS:</strong> (.*?)</p>"))
data["synopsis"] = findRe(result, "<p>(.*?)</p>")
result = findRe(html, "<div class=\"editioninfo\">(.*?)</div>") result = findRe(html, "<div class=\"purchase\">(.*?)</div>")
if 'Blu-Ray' in result or 'Essential Art House DVD' in result: if 'Blu-Ray' in result or 'Essential Art House DVD' in result:
result = re.compile("<div class=\"editioninfo\">(.*?)</div>", re.DOTALL).findall(html)[1] result = re.compile("<div class=\"editioninfo\">(.*?)</div>", re.DOTALL).findall(html)[1]
result = findRe(result, "<a href=\"(.*?)\">") result = findRe(result, "<a href=\"(.*?)\"")
if not "/boxsets/" in result: if not "/boxsets/" in result:
data["posters"] = [result] data["posters"] = [result]
else: else:

View file

@ -245,6 +245,23 @@ class ImdbCombined(Imdb):
self.regex = _regex self.regex = _regex
super(ImdbCombined, self).__init__(id, timeout) super(ImdbCombined, self).__init__(id, timeout)
def getMovieId(title, director='', year=''):
'''
>>> getMovieId('The Matrix')
'0133093'
'''
if year:
title = "%s (%s)" % (title, year)
if director:
query = 'site:imdb.com %s "%s"' % (director, title)
else:
query = 'site:imdb.com "%s"' % title
print query
for (name, url, desc) in google.find(query, 5, timeout=-1):
if url.startswith('http://www.imdb.com/title/tt'):
return url[28:35]
return ''
def guess(title, director='', timeout=google.DEFAULT_TIMEOUT): def guess(title, director='', timeout=google.DEFAULT_TIMEOUT):
#FIXME: proper file -> title #FIXME: proper file -> title
title = title.split('-')[0] title = title.split('-')[0]