From e2e012c8f1251ca33add34eeb90c99f54c34f2a8 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 16 Jan 2018 09:48:12 +0100 Subject: [PATCH] tune IMDb parser --- ox/web/imdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 9762bad..a0bee77 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -43,7 +43,7 @@ def zebra_list(label, more=None): conditions = { 'page': 'reference', 're': [ - label + '.*?', + '_label">' + label + '.*?', '(.*?)' ], 'type': 'list', @@ -289,7 +289,7 @@ class Imdb(SiteParser): isinstance(self['alternativeTitles'][0], string_types): self['alternativeTitles'] = [self['alternativeTitles']] - for key in ('country', 'genre', 'language', 'sound'): + for key in ('country', 'genre', 'language', 'sound', 'color'): if key in self: self[key] = [x[0] if len(x) == 1 and isinstance(x, list) else x for x in self[key]] self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))