year and title again
This commit is contained in:
parent
f666c4f61d
commit
ec63d9fb7c
1 changed files with 31 additions and 27 deletions
|
@ -147,7 +147,35 @@ class IMDb:
|
|||
print value
|
||||
parsed_value = value
|
||||
return parsed_value
|
||||
|
||||
|
||||
def parseTitle(self):
|
||||
title = ''
|
||||
data = self.getPage()
|
||||
soup = BeautifulSoup(data)
|
||||
html_title = soup('div', {'id': 'tn15title'})
|
||||
if not html_title:
|
||||
html_title = soup('title')
|
||||
if html_title:
|
||||
html_title = str(html_title[0])
|
||||
title = stripTags(html_title)
|
||||
title = re.sub('\(\d\d\d\d\)', '', title)
|
||||
return title.strip()
|
||||
|
||||
def parseYear(self):
|
||||
year = ''
|
||||
data = self.getPage()
|
||||
soup = BeautifulSoup(data)
|
||||
html_title = soup('div', {'id': 'tn15title'})
|
||||
if not html_title:
|
||||
html_title = soup('title')
|
||||
if html_title:
|
||||
html_title = str(html_title[0])
|
||||
year = re.compile('(\d\d\d\d)').findall(html_title)
|
||||
if year:
|
||||
year = year[0]
|
||||
else: year = ''
|
||||
return year
|
||||
|
||||
def parse(self):
|
||||
data = self.getPage()
|
||||
IMDbDict ={}
|
||||
|
@ -156,32 +184,8 @@ class IMDb:
|
|||
if not IMDbDict['poster']:
|
||||
IMDbDict['poster'] = 'http://i.imdb.com/Heads/npa.gif'
|
||||
#Title, Year
|
||||
title = u''
|
||||
year = u''
|
||||
soup = BeautifulSoup(data)
|
||||
html_title = soup('div', {'id': 'tn15title'})
|
||||
if html_title: html_title = html_title[0]('h1')
|
||||
if html_title: html_title = html_title[0].contents
|
||||
if html_title:
|
||||
title = html_title[0]
|
||||
year = re.compile('(\d\d\d\d)').findall(str(html_title[1]))
|
||||
if year: year = year[0]
|
||||
else: year = ''
|
||||
IMDbDict['year'] = year
|
||||
IMDbDict['title'] = stripTags(title).strip()
|
||||
else:
|
||||
title = _getTerm(data, '<title>(.*?)</title>')
|
||||
m = re.compile('\((\d+)\)').findall(title)
|
||||
if m:
|
||||
year = m[0]
|
||||
else:
|
||||
year = title.split('(')[-1].split(')')[0].strip()
|
||||
title = title.split('(')[0].strip().decode('utf-8')
|
||||
IMDbDict['title'] = title
|
||||
IMDbDict['year'] = year
|
||||
IMDbDict['title'] = htmldecode(IMDbDict['title'])
|
||||
if IMDbDict['title'][0] == '"' and IMDbDict['title'][-1] == '"':
|
||||
IMDbDict['title'] = IMDbDict['title'][1:-1]
|
||||
IMDbDict['year'] = self.parseYear()
|
||||
IMDbDict['title'] = self.parseTitle()
|
||||
|
||||
#Rating
|
||||
m = re.compile('<b>(.*?)/10</b>', re.IGNORECASE).search(data)
|
||||
|
|
Loading…
Reference in a new issue