From 80641b54615695942ad3da4ba5b94962e62e17b5 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 30 Nov 2009 01:46:56 +0100
Subject: [PATCH] imdb cleanups
---
oxweb/imdb.py | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/oxweb/imdb.py b/oxweb/imdb.py
index 2f7225a..3543714 100644
--- a/oxweb/imdb.py
+++ b/oxweb/imdb.py
@@ -75,8 +75,8 @@ def getRawMovieData(imdbId):
data['release date'] = getMovieReleaseDate(imdbId)
return data
-def getMovieInfo(imdbId):
- data = readUrlUnicode(getUrlBase(imdbId))
+def getMovieInfo(imdbId, timeout=-1):
+ data = readUrlUnicode(getUrlBase(imdbId), timeout=timeout)
info = dict()
info['poster'] = findRe(data, 'name="poster".*?(.*?):(.*?)
')[0]
+ else:
+ txt= i[1]
+ txt = stripTags(txt).strip()
+
def cleanUp(k):
k = decodeHtml(k).replace(u'\xa0', ' ').strip()
if k.endswith('more'): k=k[:-len('more')].strip()
@@ -258,7 +263,7 @@ def creditList(data, section=None):
credits_ = re.compile('''
.*?(.*?) | (.*?) |
''').findall(data)
credits = []
for c_ in credits_:
- c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()]
+ c = [stripTags(decodeHtml(c_[0]).strip()), stripTags(decodeHtml(c_[1]).strip())]
if section=='writers':
c[1] = c[1].replace('
', '').strip().replace(')', '').replace('(','')
if c[1].endswith(' and'): c[1] = c[1][:-4]
@@ -328,6 +333,7 @@ def getMovieLocations(imdbId):
url = "%slocations" % getUrlBase(imdbId)
data = readUrlUnicode(url)
locations = re.compile('
'):
t = t[:-8]
+ if t.endswith('
\n
'):
+ t = t[:-len('
\n
')]
return t.strip()
trivia = [clean(t) for t in trivia]
return trivia
@@ -454,7 +462,9 @@ def getMovieBusinessSum(imdbId):
b_['budget'] = int(intValue(budget[0].replace(',', '')))
if 'gross' in business:
- b_['gross'] = int(intValue(business['gross'][0].replace(',', '')))
+ gross = filter(lambda x: x.startswith('$'), business['gross'])
+ if gross:
+ b_['gross'] = int(intValue(gross[0].replace(',', '')))
#b_['gross'] = sum([int(intValue(i.replace(',', ''))) for i in business['gross']])
#if 'weekend gross' in business:
# b_['gross'] += sum([int(intValue(i.replace(',', ''))) for i in business['weekend gross']])