use imdb for search
This commit is contained in:
parent
9fc6425a9e
commit
3d93932200
1 changed files with 33 additions and 63 deletions
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import urllib2
|
import urllib2
|
||||||
from urllib import quote, unquote
|
import urllib
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
@ -298,7 +298,7 @@ class ImdbCombined(Imdb):
|
||||||
self.regex = _regex
|
self.regex = _regex
|
||||||
super(ImdbCombined, self).__init__(id, timeout)
|
super(ImdbCombined, self).__init__(id, timeout)
|
||||||
|
|
||||||
def getMovieId(title, director='', year=''):
|
def getMovieId(title, director='', year='', timeout=-1):
|
||||||
'''
|
'''
|
||||||
>>> getMovieId('The Matrix')
|
>>> getMovieId('The Matrix')
|
||||||
u'0133093'
|
u'0133093'
|
||||||
|
@ -308,16 +308,38 @@ def getMovieId(title, director='', year=''):
|
||||||
|
|
||||||
>>> getMovieId('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
|
>>> getMovieId('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
|
||||||
u'0060304'
|
u'0060304'
|
||||||
|
|
||||||
|
>>> getMovieId(u'Histoire(s) du cinema: Le controle de l'univers', 'Jean-Luc Godard')
|
||||||
|
u'0179214'
|
||||||
|
|
||||||
|
>>> getMovieId(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
|
||||||
|
u'0179214'
|
||||||
'''
|
'''
|
||||||
|
if isinstance(title, unicode):
|
||||||
|
title = title.encode('utf-8')
|
||||||
|
params = {'s':'tt','q': title}
|
||||||
if director:
|
if director:
|
||||||
query = 'site:imdb.com %s "%s" ' % (director, title)
|
if isinstance(director, unicode):
|
||||||
else:
|
director = director.encode('utf-8')
|
||||||
query = 'site:imdb.com "%s" ' % title
|
params['q'] = '"%s" %s' % (title, director)
|
||||||
if year:
|
if year:
|
||||||
query += year
|
params['q'] = '"%s (%s)" %s' % (title, year, director)
|
||||||
for (name, url, desc) in google.find(query, 5, timeout=-1):
|
params = urllib.urlencode(params)
|
||||||
if url.startswith('http://www.imdb.com/title/tt'):
|
url = "http://akas.imdb.com/find?" + params
|
||||||
return url[28:35]
|
#print url
|
||||||
|
|
||||||
|
data = readUrlUnicode(url, timeout=timeout)
|
||||||
|
#if search results in redirect, get id of current page
|
||||||
|
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
|
||||||
|
results = re.compile(r).findall(data)
|
||||||
|
if results:
|
||||||
|
return results[0]
|
||||||
|
#otherwise get first result
|
||||||
|
r = '<td valign="top">.*?<a href="/title/tt(\d{7})/"'
|
||||||
|
results = re.compile(r).findall(data)
|
||||||
|
if results:
|
||||||
|
return results[0]
|
||||||
|
#or nothing
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getMoviePoster(imdbId):
|
def getMoviePoster(imdbId):
|
||||||
|
@ -338,60 +360,8 @@ def getMoviePoster(imdbId):
|
||||||
return getMoviePoster(info['series'])
|
return getMoviePoster(info['series'])
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def guess(title, director='', timeout=google.DEFAULT_TIMEOUT):
|
def guess(title, director='', timeout=-1):
|
||||||
#FIXME: proper file -> title
|
return getMovieId(title, director, timeout=timeout)
|
||||||
'''
|
|
||||||
//this is not needed
|
|
||||||
title = title.split('-')[0]
|
|
||||||
title = title.split('(')[0]
|
|
||||||
title = title.split('.')[0]
|
|
||||||
title = title.strip()
|
|
||||||
'''
|
|
||||||
static = {
|
|
||||||
(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard'): '0179214',
|
|
||||||
(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard'): '0179214',
|
|
||||||
}.get((title, director), None)
|
|
||||||
if static:
|
|
||||||
return static
|
|
||||||
imdb_url = 'http://www.imdb.com/find?q=%s' % quote(title.encode('utf-8'))
|
|
||||||
return_url = ''
|
|
||||||
|
|
||||||
#lest first try google
|
|
||||||
#i.e. site:imdb.com Michael Stevens "Sin"
|
|
||||||
if director:
|
|
||||||
search = 'site:imdb.com %s "%s"' % (director, title)
|
|
||||||
else:
|
|
||||||
search = 'site:imdb.com "%s"' % title
|
|
||||||
|
|
||||||
for (name, url, desc) in google.find(search, 2, timeout=timeout):
|
|
||||||
if url.startswith('http://www.imdb.com/title/tt'):
|
|
||||||
return normalizeImdbId(int(ox.intValue(url)))
|
|
||||||
|
|
||||||
try:
|
|
||||||
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
|
||||||
u = urllib2.urlopen(req)
|
|
||||||
data = u.read()
|
|
||||||
return_url = u.url
|
|
||||||
u.close()
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
if return_url.startswith('http://www.imdb.com/title/tt'):
|
|
||||||
return return_url[28:35]
|
|
||||||
if data:
|
|
||||||
imdb_id = findRe(data.replace('\n', ' '), 'Popular Results.*?<ol><li>.*?<a href="/title/tt(.......)')
|
|
||||||
if imdb_id:
|
|
||||||
return imdb_id
|
|
||||||
|
|
||||||
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
|
||||||
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
|
||||||
u = urllib2.urlopen(req)
|
|
||||||
data = u.read()
|
|
||||||
return_url = u.url
|
|
||||||
u.close()
|
|
||||||
if return_url.startswith('http://www.imdb.com/title/tt'):
|
|
||||||
return return_url[28:35]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import json
|
import json
|
||||||
|
|
Loading…
Reference in a new issue