only quotes not amazon ads

This commit is contained in:
j 2008-04-30 00:24:25 +02:00
parent 8b58b4824b
commit 567e7e702d

View file

@ -12,7 +12,7 @@ import time
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
import chardet import chardet
import oxutils import oxutils
from oxutils import stripTags, htmldecode, findRegexp from oxutils import stripTags, htmldecode, findRegexp, findString
from oxutils.cache import getUrl, getUrlUnicode from oxutils.cache import getUrl, getUrlUnicode
from oxutils.normalize import normalizeTitle, normalizeImdbId from oxutils.normalize import normalizeTitle, normalizeImdbId
@ -149,7 +149,7 @@ def getMovieTrailers(imdbId):
def getMovieQuotes(imdbId): def getMovieQuotes(imdbId):
url = "%s/quotes" % getUrlBase(imdbId) url = "%s/quotes" % getUrlBase(imdbId)
data = getUrlUnicode(url) data = getUrlUnicode(url)
quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(data) quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(findString(data, '<a name="q'))
quotes = [(q[0].strip(),q[1].strip()) for q in quotes] quotes = [(q[0].strip(),q[1].strip()) for q in quotes]
return quotes return quotes