diff --git a/oxweb/dailymotion.py b/oxweb/dailymotion.py
index 7f9f3cb..c3bc2e7 100644
--- a/oxweb/dailymotion.py
+++ b/oxweb/dailymotion.py
@@ -7,11 +7,11 @@ from oxlib.cache import getUrl
def getVideoUrl(url):
'''
- >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
- 'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
+ >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?key')[0]
+ 'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv'
- >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
- 'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
+ >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?key')[0]
+ 'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv'
'''
data = getUrl(url)
video = re.compile('''video", "(.*?)"''').findall(data)
diff --git a/oxweb/imdb.py b/oxweb/imdb.py
index 66fc503..08a315b 100644
--- a/oxweb/imdb.py
+++ b/oxweb/imdb.py
@@ -74,14 +74,16 @@ def getMovieInfo(imdbId):
if k.endswith('more'): k=k[:-len('more')].strip()
return k
txt = cleanUp(txt)
- if title not in ('plot', 'trivia', 'filming locations', 'mpaa', 'tagline'):
+ if title not in ('plot', 'trivia', 'filming locations', 'mpaa', 'tagline', 'original air date'):
if '|' in txt:
txt = [cleanUp(k) for k in txt.split('|')]
elif ', ' in txt:
txt = [cleanUp(k) for k in txt.split(', ')]
+ if title == 'original air date':
+ txt = txt.split('\n')[0].strip()
if not title.startswith('moviemeter'):
info[title] = txt
- for key in ('user comments', 'writers (wga)'):
+ for key in ('user comments', 'writers (wga)', 'plot keywords'):
if key in info:
del info[key]
if 'release date' in info:
@@ -115,8 +117,30 @@ def getMovieInfo(imdbId):
title = title[:title.find(u'\xa0')].strip()
if title.startswith('"') and title.endswith('"'):
title = title[1:-1]
- info['title'] = title
+ info['title'] = normalizeTitle(title)
info['year'] = year
+ if title.startswith('"') and title.find('"',1) > 0 and \
+ title.find('"',1) == title.rfind('"'):
+ episode_title = title[title.rfind('"')+1:]
+ episode_title = re.sub("\?{4}", "", episode_title).strip()
+ episode_title = re.sub("\d{4}", "", episode_title).strip()
+ if episode_title == '-': episode_title=''
+ title = normalizeTitle(title[1:title.rfind('"')])
+ if episode_title:
+ info['episode title'] = episode_title
+ info['series title'] = title
+ info['title'] = "%s: %s" % (title, episode_title)
+ else:
+ info['title'] = title
+
+ #Series
+ se = re.compile("Season (\d*), Episode (\d*)\)").findall(data)
+ if se:
+ info['season'] = int(se[0][0])
+ info['episode'] = int(se[0][1])
+ info['title'] = "%s (S%02dE%02d) %s" % (
+ info['series title'], info['season'], info['episode'], info['episode title'])
+ info['title'] = info['title'].strip()
#Rating
rating = findRe(data, '([\d\.]*?)/10')
@@ -131,17 +155,42 @@ def getMovieInfo(imdbId):
info['votes'] = int(votes.replace(',', ''))
else:
info['votes'] = -1
+
return info
+
def getMoviePoster(imdbId):
info = getMovieInfo(imdbId)
return info['poster']
def getMovieYear(imdbId):
+ '''
+ >>> getMovieYear('0315404')
+ u'1964'
+
+ >>> getMovieYear('0734840')
+ u'1990'
+
+ >>> getMovieYear('0815352')
+ u'1964'
+ '''
info = getMovieInfo(imdbId)
return info['year']
def getMovieTitle(imdbId):
+ '''
+ >>> getMovieTitle('0306414')
+ u'The Wire'
+
+ >>> getMovieTitle('0734840')
+ u'Twin Peaks (S01E02) Episode #1.2'
+
+ >>> getMovieTitle('0734840')
+ u'Twin Peaks (S01E02) Episode #1.2'
+
+ >>> getMovieTitle('0749451')
+ u'The Wire (S01E01) The Target'
+ '''
info = getMovieInfo(imdbId)
return info['title']
@@ -474,25 +523,6 @@ class IMDb:
parsed_value = value
return parsed_value
- def parseTitle(self):
- title = getMovieTitle(self.imdb)
- title = normalizeTitle(title)
- if title.startswith('"') and title.find('"',1) > 0 and \
- title.find('"',1) == title.rfind('"'):
- data = self.getPage()
- se = re.compile("Season (\d*), Episode (\d*)\)").findall(data)
- if se:
- se = se[0]
- se = ' (S%02dE%02d) ' % (int(se[0]), int(se[1]))
- title = normalizeTitle(title[1:title.rfind('"')]) + se + title[title.rfind('"')+1:].strip()
- else:
- part2 = title[title.rfind('"')+1:]
- part2 = re.sub("[\d\?-]", "", part2).strip()
- title = normalizeTitle(title[1:title.rfind('"')])
- if part2:
- title += ':' + part2
- return normalizeTitle(title)
-
def parseYear(self):
year = ''
data = self.getPage()
@@ -520,7 +550,7 @@ class IMDb:
IMDbDict['poster'] = 'http://i.imdb.com/Heads/npa.gif'
#Title, Year
IMDbDict['year'] = self.parseYear()
- IMDbDict['title'] = self.parseTitle()
+ IMDbDict['title'] = getMovieTitle(self.imdb)
#Rating
m = re.compile('(.*?)/10', re.IGNORECASE).search(data)