also parse Year and month year dates
This commit is contained in:
parent
4c14ce613d
commit
c3874d9a68
1 changed files with 21 additions and 9 deletions
30
ox/imdb.py
30
ox/imdb.py
|
@ -313,23 +313,35 @@ def getMovieReleaseDate(imdbId):
|
|||
first_release = r[1]
|
||||
return first_release
|
||||
|
||||
def _parseDate(d):
|
||||
try:
|
||||
parsed_date = time.strptime(d, "%d %B %Y")
|
||||
parsed_date = time.strftime('%Y-%m-%d', parsed_date)
|
||||
return parsed_date
|
||||
except:
|
||||
try:
|
||||
parsed_date = time.strptime(d, "%B %Y")
|
||||
parsed_date = time.strftime('%Y-%m-01', parsed_date)
|
||||
return parsed_date
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
parsed_date = time.strptime(d, "%Y")
|
||||
parsed_date = time.strftime('%Y-01-01', parsed_date)
|
||||
return parsed_date
|
||||
except:
|
||||
pass
|
||||
return d
|
||||
|
||||
def getMovieReleaseDates(imdbId):
|
||||
url = "%s/releaseinfo" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
releasedates = []
|
||||
regexp = '''<tr><td>(.*?)</td>.*?<td align="right">(.*?)</td>.*?<td>(.*?)</td></tr>'''
|
||||
|
||||
def _parse_date(d):
|
||||
try:
|
||||
parsed_date = time.strptime(d, "%d %B %Y")
|
||||
parsed_date = time.strftime('%Y-%m-%d', parsed_date)
|
||||
return parsed_date
|
||||
except:
|
||||
return d
|
||||
|
||||
for r in re.compile(regexp, re.DOTALL).findall(data):
|
||||
r_ = (stripTags(r[0]).strip(),
|
||||
_parse_date(stripTags(r[1]).strip()),
|
||||
_parseDate(stripTags(r[1]).strip()),
|
||||
decodeHtml(stripTags(r[2]).strip()))
|
||||
releasedates.append(r_)
|
||||
return releasedates
|
||||
|
|
Loading…
Reference in a new issue