never update imdb cache for now

This commit is contained in:
j 2009-05-31 22:28:01 +02:00
parent 9fab130d81
commit 10d3783673
1 changed files with 7 additions and 2 deletions

View File

@ -10,12 +10,17 @@ from BeautifulSoup import BeautifulSoup
import chardet import chardet
import oxlib import oxlib
from oxlib import stripTags, decodeHtml, findRe, findString from oxlib import stripTags, decodeHtml, findRe, findString
from oxlib.cache import getUrl, getUrlUnicode import oxlib.cache
from oxlib.normalize import normalizeTitle, normalizeImdbId from oxlib.normalize import normalizeTitle, normalizeImdbId
from oxlib import * from oxlib import *
import google import google
'''
never timeout imdb data, to update cache remove data from cache folder
'''
def getUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
return oxlib.cache.getUrlUnicode(url, data, headers, timeout)
def getMovieId(title, director='', year=''): def getMovieId(title, director='', year=''):
''' '''
@ -330,7 +335,7 @@ def getMovieTrivia(imdbId):
def getMovieConnections(imdbId): def getMovieConnections(imdbId):
url = "%s/movieconnections" % getUrlBase(imdbId) url = "%s/movieconnections" % getUrlBase(imdbId)
data = getUrl(url) data = getUrlUnicode(url)
connections={} connections={}
for c in re.compile('''<h5>(.*?)</h5>(.*?)\n\n''', re.DOTALL).findall(data): for c in re.compile('''<h5>(.*?)</h5>(.*?)\n\n''', re.DOTALL).findall(data):
connections[unicode(c[0])] = re.compile('''<a href="/title/tt(\d{7})/">''').findall(c[1]) connections[unicode(c[0])] = re.compile('''<a href="/title/tt(\d{7})/">''').findall(c[1])