([\d\.]+)', 'type': 'float', }, 'budget': { 'page': 'business', 're': [ '

Budget

\s*?\$(.*?).*?>(.*?).*?(.*?)', lambda ll: [strip_tags(l) for l in ll] ], 'type': 'list' }, 'cinematographer': { 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Cinematography by(.*?)', '(.*?)' ], 'type': 'list' }, 'connections': { 'page': 'trivia?tab=mc', 're': '

(.*?)

(.*?)(<\/div>\n

Country:

.*?

', #'(.*?)', #links changed to work with existing caches, just take all links '(.*?)', ], 'type': 'list' }, 'creator': { 'page': 'combined', 're': [ '

Creator.?:

.*?

(.*?)

', '(.*?)' ], 'type': 'list' }, '_director': { 'page': 'combined', 're': [ '

Director:

.*?

(.*?)

', '(.*?)' ], 'type': 'list' }, 'composer': { 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Original Music by(.*?)', '.*?(.*?)', 'type': 'string' }, 'filmingLocations': { 'page': 'locations', 're': [ '(.*?)', lambda data: data.strip(), ], 'type': 'list' }, 'genre': { 'page': 'combined', 're': [ '

Genre:

(.*?)(.*?)' ], 'type': 'list' }, 'gross': { 'page': 'business', 're': [ '

Gross

\s*?\$(.*?)

Language:

.*?

', #'(.*?)', #links changed to work with existing caches, just take all links '(.*?)', ], 'type': 'list' }, 'summary': { 'page': 'plotsummary', 're': '

(.*?)<\/p>', 'type': 'string' }, 'posterId': { 'page': 'combined', 're': '/primary-photo/media/rm(.*?)/tt', 'type': 'string' }, 'posterIds': { 'page': 'posters', 're': '/unknown-thumbnail/media/rm(.*?)/tt', 'type': 'list' }, 'producer': { 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Produced by(.*?)', '(.*?)' ], 'type': 'list' }, 'productionCompany': { 'page': 'combined', 're': [ 'Production Companies

(.*?)', '(.*?)' ], 'type': 'list' }, 'rating': { 'page': 'combined', 're': '

.*?([\d,.]+?)/10', 'type': 'float' }, 'releasedate': { 'page': 'releaseinfo', 're': [ '(.*?)', ox.strip_tags, ], 'type': 'list' }, 'reviews': { 'page': 'externalreviews', 're': [ '

(.*?)', '

(.*?)

' ], 'type': 'list' }, 'runtime': { 'page': 'combined', 're': '

Runtime:

.*?([0-9]+ sec|[0-9]+ min).*?

', 'type': 'string' }, 'color': { 'page': 'combined', 're': [ '

Color:

(.*?)

', '(.*?)' ], 'type': 'list' }, 'sound': { 'page': 'combined', 're': [ '

Sound Mix:

(.*?)

', '(.*?)' ], 'type': 'list' }, 'season': { 'page': 'combined', 're': [ '

Original Air Date:

.*?

(.*?)

', '$Season (\d+), Episode \d+$', ], 'type': 'int' }, 'episode': { 'page': 'combined', 're': [ '

Original Air Date:

.*?

(.*?)

', '$Season \d+, Episode (\d+)$', ], 'type': 'int' }, 'series': { 'page': 'combined', 're': '

TV Series:

.*?(TV series|TV mini-series) ', 'type': 'string' }, 'title': { 'page': 'combined', 're': '

title?num_votes=500000,&sort=num_votes,desc' data = ox.cache.read_url(url) votes = max([int(v.replace(',', '')) for v in re.compile('([\d,]+)').findall(data)]) return votes def guess(title, director='', timeout=-1): return get_movie_id(title, director, timeout=timeout) if name == "main": import json print json.dumps(Imdb('0306414'), indent=2) #print json.dumps(Imdb('0133093'), indent=2)