From 9333abbd1b52a19b1c80d9ce62edf0528c2211af Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 9 Aug 2011 10:30:13 +0200 Subject: [PATCH] business as usual --- ox/web/imdb.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 866befd..ecba5c7 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -41,6 +41,14 @@ class Imdb(SiteParser): 'type': 'list' }, + 'budget': { + 'page': 'business', + 're': [ + '
Budget
(.*?)(.*?)', 'type': 'list' }, + 'gross': { + 'page': 'business', + 're': [ + '
Gross
(.*?)\(', + lambda data: findRe(data.replace(',', ''), '\d+') + ], + 'type': 'int' + }, 'keywords': { 'page': 'keywords', 're': '(.*?)', @@ -210,6 +226,14 @@ class Imdb(SiteParser): 're': '([\d,]*?) votes', 'type': 'string' }, + 'worldwide_gross': { + 'page': 'business', + 're': [ + '\$([\d,]+?) \(Worldwide\)', + lambda data: int(data.replace(',', '')), + ], + 'type': 'list' + }, 'writers': { 'page': 'combined', 're': [ @@ -305,6 +329,15 @@ class Imdb(SiteParser): if key in self: del self[key] + if 'worldwide_gross' in self: + if not 'gross' in self: + self['gross'] = 0 + self['gross'] += max(self['worldwide_gross']) + del self['worldwide_gross'] + + if 'budget' in self and 'gross' in self: + self['profit'] = self['gross'] - self['budget'] + class ImdbCombined(Imdb): def __init__(self, id, timeout=-1): _regex = {}