diff --git a/ox/imdb.py b/ox/imdb.py
index bcb77e1..8fc9ade 100644
--- a/ox/imdb.py
+++ b/ox/imdb.py
@@ -4,7 +4,7 @@
from oxutils import *
import urllib2
-from urllib import quote
+from urllib import quote, unquote
import re, time
import os
import time
@@ -39,8 +39,10 @@ def getUrlBase(imdbId):
def getRawMovieData(imdbId):
imdbId = normalizeImdbId(imdbId)
data = dict()
- data['credits'] = parseCredits(imdbId)
- data['poster'] = findRegexp(data, 'name="poster".*?
int(s[1]):
+ stills.append("http://i.imdb.com/Photos/Ss/%s/%s.jpg" % (imdbId, s[2]))
+ if not stills:
+ s_ = re.compile('''
int(s[1]):
+ stills.append("http://%sf.jpg" % s[2])
+ return stills
+
'''the old code below'''
-def get_image(url):
- return getUrl(url)
-
-def _castList(data, regexp):
- soup = re.compile(regexp).findall(data)
- if soup:
- soup = BeautifulSoup(soup[0])
- names = []
- for i in soup('a', {'href': re.compile('/name/nm')}):
- if i.string:
- cast = stripTags(i.string)
- if cast not in names:
- names.append(cast)
- return names
- return []
-
class IMDb:
def __init__(self, imdbId):
self.imdb = imdbId
@@ -140,7 +163,6 @@ class IMDb:
self.triviaUrl = "%strivia" % self.pageUrl
self.locationUrl = "%slocations" % self.pageUrl
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
- self.trailerUrl = "%strailers" % self.pageUrl
def getPage(self):
return getUrlUnicode(self.pageUrl)
@@ -163,8 +185,10 @@ class IMDb:
parsed_value = int(parsed_value) * 60
elif key in ('country', 'language'):
parsed_value = value.split(' / ')
+ parsed_value = [v.strip() for v in parsed_value]
elif key == 'genre':
parsed_value = value.replace('more', '').strip().split(' / ')
+ parsed_value = [v.strip() for v in parsed_value]
elif key == 'tagline':
parsed_value = value.replace('more', '').strip()
elif key == 'plot_outline':
@@ -187,7 +211,7 @@ class IMDb:
if m:
parsed_value = m[0]
parsed_value = parsed_value.split('
')[-1].split('(')[0]
- director = self.parseCredits().get('director', None)
+ director = self.getCredits().get('director', None)
if director:
director = director[0]
parsed_value = parsed_value.replace(director, '')
@@ -200,34 +224,17 @@ class IMDb:
return parsed_value
def parseTitle(self):
- title = ''
- data = self.getPage()
- soup = BeautifulSoup(data)
- html_title = soup('div', {'id': 'tn15title'})
- if not html_title:
- html_title = soup('title')
- if html_title:
- html_title = str(html_title[0])
- html_title = html_title.replace('
', ' ').replace(' ', ' ')
- title = stripTags(html_title)
- title = re.sub('\(\d{4}\)', '', title)
- title = re.sub('\(\d{4}/I*\)', '', title)
- for t in ('TV series', 'TV-Series', 'TV mini-series', '(mini)', '(VG)', '(V)', '(TV)'):
- title = title.replace(t, '')
- if title.find(u'\xa0') > -1:
- title = title[:title.find(u'\xa0')]
- title = normalizeTitle(title.strip())
- if title.startswith('"') and title.endswith('"'):
- title = normalizeTitle(title[1:-1])
- elif title.startswith('"') and title.find('"',1) > 0 and \
- title.find('"',1) == title.rfind('"'):
- se = re.compile("Season (\d*), Episode (\d*)\)").findall(data)
- if se:
- se = se[0]
- se = ' (S%02dE%02d)' % (int(se[0]), int(se[1]))
- title = normalizeTitle(title[1:title.rfind('"')]) + se + title[title.rfind('"')+1:]
- else:
- title = normalizeTitle(title[1:title.rfind('"')]) + ':' + title[title.rfind('"')+1:]
+ title = getTitle(self.imdb)
+ title = normalizeTitle(title)
+ if title.startswith('"') and title.find('"',1) > 0 and \
+ title.find('"',1) == title.rfind('"'):
+ se = re.compile("Season (\d*), Episode (\d*)\)").findall(data)
+ if se:
+ se = se[0]
+ se = ' (S%02dE%02d)' % (int(se[0]), int(se[1]))
+ title = normalizeTitle(title[1:title.rfind('"')]) + se + title[title.rfind('"')+1:]
+ else:
+ title = normalizeTitle(title[1:title.rfind('"')]) + ':' + title[title.rfind('"')+1:]
return normalizeTitle(title)
def parseYear(self):
@@ -252,7 +259,7 @@ class IMDb:
data = self.getPage()
IMDbDict ={}
#Poster
- IMDbDict['poster'] = findRegexp(data, 'name="poster".*?
title
title = title.split('-')[0]
@@ -574,21 +565,6 @@ def getEpisodeData(title, episode, show_url = None):
episodeData['imdb'] = i['episodes'][episode]['imdb']
return episodeData
-def getMovieStills(imdbId):
- data = getUrl("http://imdb.com/gallery/ss/%s" % imdbId)
- s_ = re.compile('''
int(s[1]):
- stills.append("http://i.imdb.com/Photos/Ss/%s/%s.jpg" % (imdbId, s[2]))
- if not stills:
- s_ = re.compile('''
int(s[1]):
- stills.append("http://%sf.jpg" % s[2])
- return stills
-
if __name__ == '__main__':
import sys
#print parse(sys.argv[1])