diff --git a/ox/web/imdb.py b/ox/web/imdb.py index a38d479..0bf1f2d 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -15,6 +15,15 @@ import google class Imdb(SiteParser): regex = { + 'alternative_titles': { + 'page': 'releaseinfo', + 're': [ + 'name="akas".*?
(.*?)', + 're': '.*?
(.*?)', 'type': 'string' }, 'poster_id': { @@ -145,7 +154,7 @@ class Imdb(SiteParser): }, 'year': { 'page': 'combined', - 're': '').findall(data) self['connections'] = cc + for key in ('countries', 'genres'): + self[key] = filter(lambda x: x.lower() != 'home', self[key]) + + def guess(title, director='', timeout=google.DEFAULT_TIMEOUT): #FIXME: proper file -> title title = title.split('-')[0] diff --git a/ox/web/siteparser.py b/ox/web/siteparser.py index ce5ee33..d9fa1dd 100644 --- a/ox/web/siteparser.py +++ b/ox/web/siteparser.py @@ -49,13 +49,13 @@ class SiteParser(dict): data = [f(d) for d in data] else: data = f(data) - return data + return data if self.regex[key]['type'] == 'float': data = apply_f(float, data) elif self.regex[key]['type'] == 'int': data = apply_f(int, data) elif self.regex[key]['type'] == 'date': - parse_date = lambda d: datetime.strptime('-'.join(d), '%m-%d-%Y').strftime('%Y-%m-%d') + parse_date = lambda d: d and datetime.strptime('-'.join(d), '%m-%d-%Y').strftime('%Y-%m-%d') data = apply_f(parse_date, data) self[key] = data