fix some tests and urls

This commit is contained in:
j 2016-05-21 15:19:25 +02:00
commit 7695a9c015
7 changed files with 60 additions and 238 deletions

View file

@ -7,7 +7,7 @@ import time
import unicodedata
from six.moves.urllib.parse import urlencode
from six import string_types
from six import text_type, string_types
from .. import find_re, strip_tags, decode_html
from .. import cache
@ -27,11 +27,11 @@ def get_url(id):
class Imdb(SiteParser):
'''
>>> Imdb('0068646')['title']
u'The Godfather'
>>> Imdb('0068646')['title'] == text_type(u'The Godfather')
True
>>> Imdb('0133093')['title']
u'The Matrix'
>>> Imdb('0133093')['title'] == text_type(u'The Matrix')
True
'''
regex = {
'alternativeTitles': {
@ -313,11 +313,11 @@ class Imdb(SiteParser):
return self._cache[url]
def __init__(self, id, timeout=-1):
#use akas.imdb.com to always get original title:
#http://www.imdb.com/help/show_leaf?titlelanguagedisplay
# use akas.imdb.com to always get original title:
# http://www.imdb.com/help/show_leaf?titlelanguagedisplay
self.baseUrl = "http://akas.imdb.com/title/tt%s/" % id
super(Imdb, self).__init__(timeout)
url = self.baseUrl + 'combined'
page = self.read_url(url, timeout=-1)
if '<title>IMDb: Page not found</title>' in page \
@ -640,25 +640,25 @@ def get_movie_by_title(title, timeout=-1):
If there is more than one film with that title for the year
Title (Year/I)
>>> get_movie_by_title(u'"Father Knows Best" (1954) {(#5.34)}')
u'1602860'
>>> str(get_movie_by_title(u'"Father Knows Best" (1954) {(#5.34)}'))
'1602860'
>>> get_movie_by_title(u'The Matrix (1999)')
u'0133093'
>>> str(get_movie_by_title(u'The Matrix (1999)'))
'0133093'
>>> get_movie_by_title(u'Little Egypt (1951)')
u'0043748'
>>> str(get_movie_by_title(u'Little Egypt (1951)'))
'0043748'
>>> str(get_movie_by_title(u'Little Egypt (1897/I)'))
'0214882'
>>> get_movie_by_title(u'Little Egypt (1897/I)')
u'0214882'
>>> get_movie_by_title(u'Little Egypt')
None
>>> get_movie_by_title(u'"Dexter" (2006) {Father Knows Best (#1.9)}')
u'0866567'
>>> str(get_movie_by_title(u'"Dexter" (2006) {Father Knows Best (#1.9)}'))
'0866567'
'''
params = {'s':'tt','q': title}
params = {'s': 'tt', 'q': title}
if not isinstance(title, bytes):
try:
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
@ -676,20 +676,21 @@ def get_movie_by_title(title, timeout=-1):
def get_movie_id(title, director='', year='', timeout=-1):
'''
>>> get_movie_id('The Matrix')
u'0133093'
>>> str(get_movie_id('The Matrix'))
'0133093'
>>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard')
u'0060304'
>>> str(get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard'))
'0060304'
>>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
u'0060304'
>>> str(get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967'))
'0060304'
>>> get_movie_id(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard')
u'0179214'
>>> str(get_movie_id(u"Histoire(s) du cinema: Le controle de l'univers", u'Jean-Luc Godard'))
'0179214'
>>> str(get_movie_id(u"Histoire(s) du cinéma: Le contrôle de l'univers", u'Jean-Luc Godard'))
'0179214'
>>> get_movie_id(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
u'0179214'
'''
imdbId = {
(u'Le jour se l\xe8ve', u'Marcel Carn\xe9'): '0031514',
@ -772,9 +773,6 @@ def get_movie_poster(imdbId):
'''
>>> get_movie_poster('0133093')
'http://ia.media-imdb.com/images/M/MV5BMjEzNjg1NTg2NV5BMl5BanBnXkFtZTYwNjY3MzQ5._V1._SX338_SY475_.jpg'
>>> get_movie_poster('0994352')
'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
'''
info = ImdbCombined(imdbId)
if 'posterId' in info:
@ -806,7 +804,7 @@ def max_votes():
url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
data = cache.read_url(url)
votes = max([int(v.replace(',', ''))
for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
return votes
def guess(title, director='', timeout=-1):