parse Original Air Date
This commit is contained in:
parent
3e91998dfb
commit
0cf8d6f330
1 changed files with 9 additions and 4 deletions
|
@ -6,6 +6,7 @@ import urllib2
|
|||
from urllib import quote
|
||||
import re, time
|
||||
import os
|
||||
import time
|
||||
|
||||
from elementtree.ElementTree import parse, tostring
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
@ -331,9 +332,8 @@ class IMDb:
|
|||
|
||||
def parseEpisodes(self):
|
||||
episodes = {}
|
||||
cdata = self.getEpisodes().replace('\r\n',' ')
|
||||
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>.*?</b><br>(.*?)<br/>'''
|
||||
#regexp = r'''Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></span><br>.*?<br>(.*?)</td>'''
|
||||
cdata = self.getEpisodes().replace('\r\n', ' ')
|
||||
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>(.*?)</b><br>(.*?)<br/>'''
|
||||
reg = re.compile(regexp, re.IGNORECASE)
|
||||
m = reg.findall(cdata)
|
||||
for match in m:
|
||||
|
@ -344,9 +344,14 @@ class IMDb:
|
|||
episodes[episode]['title'] = match[3].strip()
|
||||
if episodes[episode]['title'].startswith('Episode #%d'%int(match[0])):
|
||||
episodes[episode]['title'] = u''
|
||||
description = htmldecode(match[4])
|
||||
description = htmldecode(match[5])
|
||||
description = stripTags(description.split('Next US airings:')[0])
|
||||
episodes[episode]['description'] = description
|
||||
episodes[episode]['date'] = ''
|
||||
d = stripTags(match[4])
|
||||
d = d.replace('Original Air Date: ', '')
|
||||
d = time.strftime("%Y-%m-%d", time.strptime(d, '%d %B %Y'))
|
||||
episodes[episode]['date'] = d
|
||||
except:
|
||||
import traceback
|
||||
print traceback.print_exc()
|
||||
|
|
Loading…
Reference in a new issue