From 7243eefb8becf2825fd6f39415771e132978fc6f Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 13 Jan 2012 21:02:22 +0530 Subject: [PATCH] amg has to be digit --- ox/web/wikipedia.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ox/web/wikipedia.py b/ox/web/wikipedia.py index 0b9de17..d1d970f 100644 --- a/ox/web/wikipedia.py +++ b/ox/web/wikipedia.py @@ -70,6 +70,8 @@ def getMovieData(wikipediaUrl): value = value.split('
') filmbox[key.strip()] = value + if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit(): + del filmbox['amg_id'] if 'Allmovie movie' in data: filmbox['amg_id'] = findRe(data, 'Allmovie movie\|.*?(\d+)') @@ -85,15 +87,15 @@ def getMovieData(wikipediaUrl): if r: filmbox['archiveorg_id'] = r[0] - r = re.compile('{{mojo title\|(.*?)\|', re.IGNORECASE).findall(data) + r = re.compile('{{mojo title\|(.*?)[\|}]', re.IGNORECASE).findall(data) if r: filmbox['mojo_id'] = r[0].replace('id=', '') - r = re.compile('{{rotten-tomatoes\|(.*?)\|', re.IGNORECASE).findall(data) + r = re.compile('{{rotten-tomatoes\|(.*?)[\|}]', re.IGNORECASE).findall(data) if r: filmbox['rottentomatoes_id'] = r[0].replace('id=', '') if 'google video' in data: - filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|') + filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)[\|}]') if 'DEFAULTSORT' in data: filmbox['title_sort'] = findRe(data, '''\{\{DEFAULTSORT:(.*?)\}\}''') return filmbox