From da38ba3839b378ee2e386dafd3aa518a96ad3f04 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 21 Feb 2019 17:43:05 +0530 Subject: [PATCH] imdb: parse tenical info --- ox/web/imdb.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 8e011cc..e52cfc5 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -90,6 +90,20 @@ def parse_aspectratio(value): value = '.'.join(value.strip().split('.')[:2]) return value + +def technical(label): + return { + 'page': 'technical', + 're': [ + '\s*?%s\s*?.*?\s*?(.*?)\s*?' % label, + lambda data: [ + re.sub('\s+', ' ', d.strip()) for d in data.strip().split('
') + ] if data else [] + ], + 'type': 'list' + } + + ''' 'posterIds': { 'page': 'posters', @@ -300,6 +314,11 @@ class Imdb(SiteParser): ], 'type': 'list' }, + 'laboratory': technical('Laboratory'), + 'camera': technical('Camera'), + 'negative format': technical('Negative Format'), + 'cinematographic process': technical('Cinematographic Process'), + 'printed film format': technical('Printed Film Format'), } def read_url(self, url, timeout):