# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 from __future__ import print_function import re import time import unicodedata from six.moves.urllib.parse import urlencode from six import text_type, string_types from .. import find_re, strip_tags, decode_html from .. import cache from . siteparser import SiteParser from . import duckduckgo from ..utils import datetime from ..geo import normalize_country_name def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False): headers = headers.copy() # https://webapps.stackexchange.com/questions/11003/how-can-i-disable-reconfigure-imdbs-automatic-geo-location-so-it-does-not-defau headers['X-Forwarded-For'] = '72.21.206.80' return cache.read_url(url, data, headers, timeout, unicode=unicode) def get_url(id): return "http://www.imdb.com/title/tt%s/" % id class Imdb(SiteParser): ''' >>> Imdb('0068646')['title'] == text_type(u'The Godfather') True >>> Imdb('0133093')['title'] == text_type(u'The Matrix') True ''' regex = { 'alternativeTitles': { 'page': 'releaseinfo', 're': [ '
(.*?)<\/p>',
'type': 'string'
},
'posterId': {
'page': 'combined',
're': '(.*?)
',
'(.*?)'
],
'type': 'list'
},
'rating': {
'page': 'combined',
're': '