From 0f738495f70f785785118a34bce3f44a6b76931d Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 12 Apr 2007 11:34:57 +0000 Subject: [PATCH] title_english --- scrapeit/imdb.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index 3964333..db063ae 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -132,6 +132,16 @@ class IMDb: parsed_value = m[0][0] else: parsed_value = '' + elif key == 'also_known_as': + parsed_value = '' + m = re.compile('(.*) \(International: English title').findall(value) + if m: + parsed_value = m[0] + else: + m = re.compile('(.*) \(USA').findall(value) + if m: + parsed_value = m[0] + parsed_value = parsed_value.split('
')[-1].split('(')[0].strip() else: print value parsed_value = value @@ -178,7 +188,7 @@ class IMDb: data = data.replace('\n',' ') #some values - keys = ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline', 'tv_series') + keys = ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline', 'tv_series', 'also_known_as') for key in keys: IMDbDict[key] = '' IMDbDict['runtime'] = 0 @@ -191,6 +201,7 @@ class IMDb: if key in keys: IMDbDict[key] = self.parse_raw_value(key, raw_value) + IMDbDict['title_english'] = IMDbDict.pop('also_known_as', IMDbDict['title']) #is episode IMDbDict['episode_of'] = IMDbDict.pop('tv_series', '')