# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 from urllib import urlencode import simplejson from oxlib.cache import getUrl, getUrlUnicode from oxlib import findRe, decodeHtml def getMovieId(title, director='', year=''): query = '"%s" film %s %s' % (title, director, year) result = find(query, 1) if result: return result[0][1] return '' def getUrlByImdbId(imdbId): query = '"%s"'% imdbId result = find(query) if result: url = result[0][1] return url if str(imdbId).startswith('0'): imdbId = imdbId[1:] return getUrlByImdb(imdbId) def getUrlByImdb(imdbId): # deprecated, use getUrlByImdbId() return getUrlByImdbId(imdbId) def getUrlByAllmovieId(allmovieId): query = '"amg_id = 1:%s"'% allmovieId result = find(query) if result: url = result[0][1] return url return '' def getWikiData(wikipediaUrl): title = wikipediaUrl.replace('http://en.wikipedia.org/wiki/', '') url = "http://en.wikipedia.org/w/index.php?title=%s&action=raw" % title data = getUrlUnicode(url) return data def getMovieData(wikipediaUrl): data = getWikiData(wikipediaUrl) filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\n\}\}''') filmbox = {} for row in filmbox_data.strip().split('\n|'): d = row.split('=') if len(d) == 2: key = d[0].strip() if key[0] == '|': key = key[1:] value = d[1].strip() filmbox[key] = value if 'imdb title' in data: filmbox['imdb_id'] = findRe(data, 'imdb title\|.*?(\d*?)\|') if 'Amg movie' in data: filmbox['amg_id'] = findRe(data, 'Amg movie\|.*?(\d*?)\|') if 'rotten-tomatoes' in data: filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|id\=(.*?)\|') if not filmbox['rottentomatoes_id']: filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|(.*?)\|') return filmbox def getImageUrl(name): data = getUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name) url = findRe(data, '="(http://upload.wikimedia.org/.*?)"') return url def getPosterUrl(wikipediaUrl): data = getMovieData(wikipediaUrl) if 'image' in data: return getImageUrl(data['image']) return '' def getMoviePoster(wikipediaUrl): # deprecated, use getPosterUrl() return getPosterUrl(wikipediaUrl) def getAllmovieId(wikipediaUrl): data = getMovieData(wikipediaUrl) return data.get('amg_id', '') def find(query, max_results=10): query = {'action': 'query', 'list':'search', 'format': 'json', 'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')} url = "http://en.wikipedia.org/w/api.php?" + urlencode(query) data = getUrl(url) if not data: data = getUrl(url, timeout=0) result = simplejson.loads(data) results = [] if result and 'query' in result: for r in result['query']['search']: title = r['title'] url = "http://en.wikipedia.org/wiki/%s" % title.replace(' ', '_') results.append((title, url, '')) return results