api change htmldecode->decodeHtml

This commit is contained in:
j 2008-05-07 11:47:53 +02:00
parent 8e8f8f3896
commit be45b75845

View File

@ -12,7 +12,7 @@ import time
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
import chardet import chardet
import oxutils import oxutils
from oxutils import stripTags, htmldecode, findRe, findString from oxutils import stripTags, decodeHtml, findRe, findString
from oxutils.cache import getUrl, getUrlUnicode from oxutils.cache import getUrl, getUrlUnicode
from oxutils.normalize import normalizeTitle, normalizeImdbId from oxutils.normalize import normalizeTitle, normalizeImdbId
@ -63,7 +63,7 @@ def getMovieInfo(imdbId):
title = stripTags(i[0]).strip().lower() title = stripTags(i[0]).strip().lower()
txt= stripTags(i[1]).strip() txt= stripTags(i[1]).strip()
def cleanUp(k): def cleanUp(k):
k = htmldecode(k).replace(u'\xa0', ' ').strip() k = decodeHtml(k).replace(u'\xa0', ' ').strip()
if k.endswith('more'): k=k[:-len('more')].strip() if k.endswith('more'): k=k[:-len('more')].strip()
return k return k
txt = cleanUp(txt) txt = cleanUp(txt)
@ -91,7 +91,7 @@ def getMovieInfo(imdbId):
if html_title: if html_title:
html_title = str(html_title[0]) html_title = str(html_title[0])
html_title = html_title.replace('<br />', ' ').replace(' ', ' ') html_title = html_title.replace('<br />', ' ').replace(' ', ' ')
title = htmldecode(html_title) title = decodeHtml(html_title)
title = stripTags(title) title = stripTags(title)
year = findRe(title, '\((\d{4})\)') year = findRe(title, '\((\d{4})\)')
if not year: if not year:
@ -210,7 +210,7 @@ def getMovieLocations(imdbId):
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
locations = [] locations = []
for key in soup('a', {'href': re.compile('^/List')}): for key in soup('a', {'href': re.compile('^/List')}):
locations.append(htmldecode(key.string)) locations.append(decodeHtml(key.string))
return locations return locations
def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')): def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
@ -270,7 +270,7 @@ def getMovieKeywords(imdbId):
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
keywords = [] keywords = []
for key in soup('a', {'href': re.compile('^/keyword/')}): for key in soup('a', {'href': re.compile('^/keyword/')}):
k = htmldecode(key.string) k = decodeHtml(key.string)
k = k.replace(u'\xa0', ' ') k = k.replace(u'\xa0', ' ')
keywords.append(k) keywords.append(k)
return keywords return keywords
@ -509,7 +509,7 @@ class IMDb:
episodes[episode]['title'] = match[3].strip() episodes[episode]['title'] = match[3].strip()
if episodes[episode]['title'].startswith('Episode #%d'%int(match[0])): if episodes[episode]['title'].startswith('Episode #%d'%int(match[0])):
episodes[episode]['title'] = u'' episodes[episode]['title'] = u''
description = htmldecode(match[5]) description = decodeHtml(match[5])
description = stripTags(description.split('Next US airings:')[0]) description = stripTags(description.split('Next US airings:')[0])
episodes[episode]['description'] = description episodes[episode]['description'] = description
episodes[episode]['date'] = '' episodes[episode]['date'] = ''