only quotes not amazon ads

This commit is contained in:
j 2008-04-30 00:24:25 +02:00
parent 8b58b4824b
commit 567e7e702d

View file

@ -12,7 +12,7 @@ import time
from BeautifulSoup import BeautifulSoup
import chardet
import oxutils
from oxutils import stripTags, htmldecode, findRegexp
from oxutils import stripTags, htmldecode, findRegexp, findString
from oxutils.cache import getUrl, getUrlUnicode
from oxutils.normalize import normalizeTitle, normalizeImdbId
@ -149,7 +149,7 @@ def getMovieTrailers(imdbId):
def getMovieQuotes(imdbId):
url = "%s/quotes" % getUrlBase(imdbId)
data = getUrlUnicode(url)
quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(data)
quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(findString(data, '<a name="q'))
quotes = [(q[0].strip(),q[1].strip()) for q in quotes]
return quotes