diff --git a/oxweb/imdb.py b/oxweb/imdb.py index 2f7225a..3543714 100644 --- a/oxweb/imdb.py +++ b/oxweb/imdb.py @@ -75,8 +75,8 @@ def getRawMovieData(imdbId): data['release date'] = getMovieReleaseDate(imdbId) return data -def getMovieInfo(imdbId): - data = readUrlUnicode(getUrlBase(imdbId)) +def getMovieInfo(imdbId, timeout=-1): + data = readUrlUnicode(getUrlBase(imdbId), timeout=timeout) info = dict() info['poster'] = findRe(data, 'name="poster".*?(.*?):(.*?)
')[0] + else: + txt= i[1] + txt = stripTags(txt).strip() + def cleanUp(k): k = decodeHtml(k).replace(u'\xa0', ' ').strip() if k.endswith('more'): k=k[:-len('more')].strip() @@ -258,7 +263,7 @@ def creditList(data, section=None): credits_ = re.compile('''.*?(.*?)(.*?)''').findall(data) credits = [] for c_ in credits_: - c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()] + c = [stripTags(decodeHtml(c_[0]).strip()), stripTags(decodeHtml(c_[1]).strip())] if section=='writers': c[1] = c[1].replace('
', '').strip().replace(')', '').replace('(','') if c[1].endswith(' and'): c[1] = c[1][:-4] @@ -328,6 +333,7 @@ def getMovieLocations(imdbId): url = "%slocations" % getUrlBase(imdbId) data = readUrlUnicode(url) locations = re.compile('

'): t = t[:-8] + if t.endswith('
\n
'): + t = t[:-len('
\n
')] return t.strip() trivia = [clean(t) for t in trivia] return trivia @@ -454,7 +462,9 @@ def getMovieBusinessSum(imdbId): b_['budget'] = int(intValue(budget[0].replace(',', ''))) if 'gross' in business: - b_['gross'] = int(intValue(business['gross'][0].replace(',', ''))) + gross = filter(lambda x: x.startswith('$'), business['gross']) + if gross: + b_['gross'] = int(intValue(gross[0].replace(',', ''))) #b_['gross'] = sum([int(intValue(i.replace(',', ''))) for i in business['gross']]) #if 'weekend gross' in business: # b_['gross'] += sum([int(intValue(i.replace(',', ''))) for i in business['weekend gross']])