diff --git a/oxweb/wikipedia.py b/oxweb/wikipedia.py index 905abed..8c74efe 100644 --- a/oxweb/wikipedia.py +++ b/oxweb/wikipedia.py @@ -37,8 +37,9 @@ def getUrlByAllmovieId(allmovieId): return '' def getWikiData(wikipediaUrl): - title = wikipediaUrl.replace('http://en.wikipedia.org/wiki/', '') - url = "http://en.wikipedia.org/w/index.php?title=%s&action=raw" % title + url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=') + url = "%s&action=raw" % url + print url data = getUrlUnicode(url) return data @@ -46,7 +47,10 @@ def getMovieData(wikipediaUrl): data = getWikiData(wikipediaUrl) filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\n\}\}''') filmbox = {} - for row in filmbox_data.strip().split('\n|'): + _box = filmbox_data.strip().split('\n|') + if len(_box) == 1: + _box = _box[0].split('|\n') + for row in _box: d = row.split('=') if len(d) == 2: key = d[0].strip() @@ -56,7 +60,7 @@ def getMovieData(wikipediaUrl): filmbox[key] = value if 'imdb title' in data: filmbox['imdb_id'] = findRe(data, 'imdb title\|.*?(\d*?)\|') - else if 'imdb episode' in data: + elif 'imdb episode' in data: filmbox['imdb_id'] = findRe(data, 'imdb episode\|.*?(\d*?)\|') if 'Amg movie' in data: filmbox['amg_id'] = findRe(data, 'Amg movie\|.*?(\d*?)\|') @@ -64,6 +68,8 @@ def getMovieData(wikipediaUrl): filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|id\=(.*?)\|') if not filmbox['rottentomatoes_id']: filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|(.*?)\|') + if 'google video' in data: + filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|') if 'DEFAULTSORT' in data: filmbox['title_sort'] = findRe(data, '''\{\{DEFAULTSORT:(.*?)\}\}''') return filmbox