parse Original Air Date
This commit is contained in:
parent
3e91998dfb
commit
0cf8d6f330
1 changed files with 9 additions and 4 deletions
|
@ -6,6 +6,7 @@ import urllib2
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
import re, time
|
import re, time
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
from elementtree.ElementTree import parse, tostring
|
from elementtree.ElementTree import parse, tostring
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
@ -332,8 +333,7 @@ class IMDb:
|
||||||
def parseEpisodes(self):
|
def parseEpisodes(self):
|
||||||
episodes = {}
|
episodes = {}
|
||||||
cdata = self.getEpisodes().replace('\r\n', ' ')
|
cdata = self.getEpisodes().replace('\r\n', ' ')
|
||||||
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>.*?</b><br>(.*?)<br/>'''
|
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>(.*?)</b><br>(.*?)<br/>'''
|
||||||
#regexp = r'''Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></span><br>.*?<br>(.*?)</td>'''
|
|
||||||
reg = re.compile(regexp, re.IGNORECASE)
|
reg = re.compile(regexp, re.IGNORECASE)
|
||||||
m = reg.findall(cdata)
|
m = reg.findall(cdata)
|
||||||
for match in m:
|
for match in m:
|
||||||
|
@ -344,9 +344,14 @@ class IMDb:
|
||||||
episodes[episode]['title'] = match[3].strip()
|
episodes[episode]['title'] = match[3].strip()
|
||||||
if episodes[episode]['title'].startswith('Episode #%d'%int(match[0])):
|
if episodes[episode]['title'].startswith('Episode #%d'%int(match[0])):
|
||||||
episodes[episode]['title'] = u''
|
episodes[episode]['title'] = u''
|
||||||
description = htmldecode(match[4])
|
description = htmldecode(match[5])
|
||||||
description = stripTags(description.split('Next US airings:')[0])
|
description = stripTags(description.split('Next US airings:')[0])
|
||||||
episodes[episode]['description'] = description
|
episodes[episode]['description'] = description
|
||||||
|
episodes[episode]['date'] = ''
|
||||||
|
d = stripTags(match[4])
|
||||||
|
d = d.replace('Original Air Date: ', '')
|
||||||
|
d = time.strftime("%Y-%m-%d", time.strptime(d, '%d %B %Y'))
|
||||||
|
episodes[episode]['date'] = d
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
print traceback.print_exc()
|
print traceback.print_exc()
|
||||||
|
|
Loading…
Reference in a new issue