From c3874d9a680f750922227baefb97b4979e5b81af Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 1 Jul 2008 14:40:36 +0200 Subject: [PATCH] also parse Year and month year dates --- ox/imdb.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/ox/imdb.py b/ox/imdb.py index d776095..e43d7ee 100644 --- a/ox/imdb.py +++ b/ox/imdb.py @@ -313,23 +313,35 @@ def getMovieReleaseDate(imdbId): first_release = r[1] return first_release +def _parseDate(d): + try: + parsed_date = time.strptime(d, "%d %B %Y") + parsed_date = time.strftime('%Y-%m-%d', parsed_date) + return parsed_date + except: + try: + parsed_date = time.strptime(d, "%B %Y") + parsed_date = time.strftime('%Y-%m-01', parsed_date) + return parsed_date + except: + pass + try: + parsed_date = time.strptime(d, "%Y") + parsed_date = time.strftime('%Y-01-01', parsed_date) + return parsed_date + except: + pass + return d + def getMovieReleaseDates(imdbId): url = "%s/releaseinfo" % getUrlBase(imdbId) data = getUrlUnicode(url) releasedates = [] regexp = '''(.*?).*?(.*?).*?(.*?)''' - def _parse_date(d): - try: - parsed_date = time.strptime(d, "%d %B %Y") - parsed_date = time.strftime('%Y-%m-%d', parsed_date) - return parsed_date - except: - return d - for r in re.compile(regexp, re.DOTALL).findall(data): r_ = (stripTags(r[0]).strip(), - _parse_date(stripTags(r[1]).strip()), + _parseDate(stripTags(r[1]).strip()), decodeHtml(stripTags(r[2]).strip())) releasedates.append(r_) return releasedates