diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py
index 7de45d7..dc198d3 100644
--- a/scrapeit/imdb.py
+++ b/scrapeit/imdb.py
@@ -6,6 +6,7 @@ import urllib2
from urllib import quote
import re, time
import os
+import time
from elementtree.ElementTree import parse, tostring
from BeautifulSoup import BeautifulSoup
@@ -331,9 +332,8 @@ class IMDb:
def parseEpisodes(self):
episodes = {}
- cdata = self.getEpisodes().replace('\r\n',' ')
- regexp = r'''
Season (.*?), Episode (.*?): (.*?)
.*?
(.*?)
'''
- #regexp = r'''Season (.*?), Episode (.*?): (.*?)
.*?
(.*?)'''
+ cdata = self.getEpisodes().replace('\r\n', ' ')
+ regexp = r'''Season (.*?), Episode (.*?): (.*?)
(.*?)
(.*?)
'''
reg = re.compile(regexp, re.IGNORECASE)
m = reg.findall(cdata)
for match in m:
@@ -344,9 +344,14 @@ class IMDb:
episodes[episode]['title'] = match[3].strip()
if episodes[episode]['title'].startswith('Episode #%d'%int(match[0])):
episodes[episode]['title'] = u''
- description = htmldecode(match[4])
+ description = htmldecode(match[5])
description = stripTags(description.split('Next US airings:')[0])
episodes[episode]['description'] = description
+ episodes[episode]['date'] = ''
+ d = stripTags(match[4])
+ d = d.replace('Original Air Date: ', '')
+ d = time.strftime("%Y-%m-%d", time.strptime(d, '%d %B %Y'))
+ episodes[episode]['date'] = d
except:
import traceback
print traceback.print_exc()