diff --git a/oxweb/imdb.py b/oxweb/imdb.py
index a642421..dcc30e0 100644
--- a/oxweb/imdb.py
+++ b/oxweb/imdb.py
@@ -22,6 +22,17 @@ import google
def getUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
return oxlib.cache.getUrlUnicode(url, data, headers, timeout)
+'''
+check if result is valid while updating
+def validate(result, header):
+ return header['status'] == u'200'
+
+try:
+ d = oxlib.cache.getUrlUnicode(url, data, headers, timeout=0, valid=validate)
+except oxlib.cache.InvalidResult, e:
+ print e.headers
+
+'''
def getMovieId(title, director='', year=''):
'''
>>> getMovieId('The Matrix')
@@ -43,7 +54,7 @@ def getMovieData(imdbId):
# internal functions below
def getUrlBase(imdbId):
- return "http://www.imdb.com/title/tt%s" % imdbId
+ return "http://www.imdb.com/title/tt%s/" % imdbId
def getRawMovieData(imdbId):
imdbId = normalizeImdbId(imdbId)
@@ -226,7 +237,7 @@ def creditList(data, section=None):
def getMovieCredits(imdbId):
credits = dict()
- url = "%s/fullcredits" % getUrlBase(imdbId)
+ url = "%sfullcredits" % getUrlBase(imdbId)
data = getUrlUnicode(url)
groups = data.split('
(.*?)\n\n''', re.DOTALL).findall(data):
@@ -342,7 +353,7 @@ def getMovieConnections(imdbId):
return connections
def getMovieKeywords(imdbId):
- url = "%s/keywords" % getUrlBase(imdbId)
+ url = "%skeywords" % getUrlBase(imdbId)
data = getUrlUnicode(url)
keywords = []
for keyword in re.compile('''(.*?)''').findall(data):
@@ -352,7 +363,7 @@ def getMovieKeywords(imdbId):
return keywords
def getMovieExternalReviews(imdbId):
- url = "%s/externalreviews" % getUrlBase(imdbId)
+ url = "%sexternalreviews" % getUrlBase(imdbId)
data = getUrlUnicode(url)
soup = BeautifulSoup(data)
ol = soup('ol')
@@ -403,7 +414,7 @@ def _parseDate(d):
return d
def getMovieReleaseDates(imdbId):
- url = "%s/releaseinfo" % getUrlBase(imdbId)
+ url = "%sreleaseinfo" % getUrlBase(imdbId)
data = getUrlUnicode(url)
releasedates = []
regexp = '''(.*?) | .*?(.*?) | .*?(.*?) |
'''
@@ -441,7 +452,7 @@ def getMovieFlimingDates(imdbId):
return ''
def getMovieBusiness(imdbId):
- url = "%s/business" % getUrlBase(imdbId)
+ url = "%sbusiness" % getUrlBase(imdbId)
data = getUrlUnicode(url)
business = {}
for r in re.compile('''(.*?)
(.*?)
.
''', re.DOTALL).findall(data):
@@ -451,7 +462,7 @@ def getMovieBusiness(imdbId):
return business
def getMovieEpisodes(imdbId):
- url = "%s/episodes" % getUrlBase(imdbId)
+ url = "%sepisodes" % getUrlBase(imdbId)
data = getUrlUnicode(url)
episodes = {}
regexp = r'''Season (.*?), Episode (.*?): (.*?)
(.*?)
(.*?)
'''
@@ -485,7 +496,7 @@ def getMovieEpisodes(imdbId):
class IMDb:
def __init__(self, imdbId):
self.imdb = imdbId
- self.pageUrl = "http://www.imdb.com/title/tt%s/" % self.imdb
+ self.pageUrl = getUrlBase(imdbId)
def getPage(self):
return getUrlUnicode(self.pageUrl)