From 8cf1f86184777df66582b85f5a47fcffe81b74b2 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 28 Jun 2007 11:00:12 +0000 Subject: [PATCH] title refinement --- scrapeit/imdb.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index 628eed7..cb202f1 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -159,9 +159,9 @@ class IMDb: html_title = str(html_title[0]) title = stripTags(html_title) title = re.sub('\(\d\d\d\d\)', '', title) - title = re.sub('\(TV\)', '', title) - title = re.sub('\(V\)', '', title) - title = re.sub('\(VG\)', '', title) + title = re.sub('\(\d\d\d\d\/I)', '', title) + for t in ('TV-Series', '(mini)', '(VG)', '(V)', '(TV)') + title = title.replace(t, '') return title.strip() def parseYear(self): @@ -174,6 +174,8 @@ class IMDb: if html_title: html_title = str(html_title[0]) year = re.compile('(\d\d\d\d)').findall(html_title) + if not year: + year = re.compile('(\d\d\d\d/I)').findall(html_title) if year: year = year[0] else: year = ''