From f3787c741224fd58d6de8efdf52d05762d7354e2 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 13 Jan 2012 19:37:12 +0530 Subject: [PATCH] not all archive.org links have a title --- ox/web/wikipedia.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ox/web/wikipedia.py b/ox/web/wikipedia.py index e339e17..849c5a1 100644 --- a/ox/web/wikipedia.py +++ b/ox/web/wikipedia.py @@ -51,7 +51,8 @@ def getWikiData(wikipediaUrl): return data def getMovieData(wikipediaUrl): - if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl) + if not wikipediaUrl.startswith('http'): + wikipediaUrl = getUrl(wikipediaUrl) data = getWikiData(wikipediaUrl) filmbox_data = findRe(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''') filmbox = {} @@ -80,7 +81,7 @@ def getMovieData(wikipediaUrl): if r: filmbox['imdb_id'] = r[0] - r = re.compile('{{Internet Archive.*?\|id=(.*?)\|', re.IGNORECASE).findall(data) + r = re.compile('{{Internet Archive.*?\|id=(.*?)[\|}]', re.IGNORECASE).findall(data) if r: filmbox['archiveorg_id'] = r[0]