From 03af5a3de07350a0ab116dee8f83f23c0a65f300 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 10 Jul 2009 10:47:01 +0200 Subject: [PATCH] fix imdb serach and mapping --- oxweb/imdb.py | 27 ++++++++++++++++----------- oxweb/wikipedia.py | 16 +++++++++++----- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/oxweb/imdb.py b/oxweb/imdb.py index 295150a..08823ed 100644 --- a/oxweb/imdb.py +++ b/oxweb/imdb.py @@ -185,6 +185,21 @@ def getMovieInfo(imdbId): return info +def getMovieRuntimeSeconds(imdbId): + info = getMovieInfo(imdbId) + value = info['runtime'][0] + parsed_value = findRe(value, '(.*?) min') + parsed_value = findRe(parsed_value, '([0-9]+)') + if not parsed_value: + parsed_value = findRe(value, '(.*?) sec') + parsed_value = findRe(parsed_value, '([0-9]+)') + if not parsed_value: + parsed_value = 0 + else: + parsed_value = int(parsed_value) + else: + parsed_value = int(parsed_value) * 60 + return parsed_value def getMoviePoster(imdbId): info = getMovieInfo(imdbId) @@ -505,17 +520,7 @@ class IMDb: if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'): value = stripTags(value).strip() if key == 'runtime': - parsed_value = findRe(value, '(.*?) min') - parsed_value = findRe(parsed_value, '([0-9]+)') - if not parsed_value: - parsed_value = findRe(value, '(.*?) sec') - parsed_value = findRe(parsed_value, '([0-9]+)') - if not parsed_value: - parsed_value = 0 - else: - parsed_value = int(parsed_value) - else: - parsed_value = int(parsed_value) * 60 + parsed_value = getMovieRuntimeSeconds(self.imdb) elif key in ('country', 'language'): parsed_value = value.split(' / ') if len(parsed_value) == 1: diff --git a/oxweb/wikipedia.py b/oxweb/wikipedia.py index c79220b..8be87f8 100644 --- a/oxweb/wikipedia.py +++ b/oxweb/wikipedia.py @@ -15,7 +15,7 @@ def getMovieId(title, director='', year=''): return '' def getUrlByImdbId(imdbId): - query = '"imdb_id = %s"'% imdbId + query = '"%s"'% imdbId result = find(query) if result: url = result[0][1] @@ -45,16 +45,22 @@ def getWikiData(wikipediaUrl): def getMovieData(wikipediaUrl): data = getWikiData(wikipediaUrl) - filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\}\}''') + filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\n\}\}''') filmbox = {} - for row in filmbox_data.strip().split('|'): + for row in filmbox_data.strip().split('\n|'): d = row.split('=') if len(d) == 2: key = d[0].strip() + if key[0] == '|': + key = key[1:] value = d[1].strip() filmbox[key] = value - if 'amg_id' in filmbox: - filmbox['amg_id'] = filmbox['amg_id'].split('1:')[-1] + if 'imdb title' in data: + filmbox['imdb_id'] = findRe(data, 'imdb title\|(\d*?)\|') + if 'Amg movie' in data: + filmbox['amg_id'] = findRe(data, 'Amg movie\|(\d*?)\|') + if 'rotten-tomatoes' in data: + filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|id\=(.*?)\|') return filmbox def getImageUrl(name):