From c941b9877d81f56887f57226fe981ad9bb5227ca Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 28 Jun 2007 11:14:23 +0000 Subject: [PATCH] parse year not any 4 digit number in title --- scrapeit/imdb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index 84fd762..57d926a 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -176,9 +176,10 @@ class IMDb: html_title = soup('title') if html_title: html_title = str(html_title[0]) - year = re.compile('(\d\d\d\d)').findall(html_title) + html_title = stripTags(html_title) + year = re.compile('\((\d\d\d\d)\)').findall(html_title) if not year: - year = re.compile('(\d\d\d\d/I)').findall(html_title) + year = re.compile('\((\d\d\d\d/I)\)').findall(html_title) if year: year = year[0] else: year = ''