diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index db4701f..b6fb00a 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -278,7 +278,7 @@ class IMDb: real_name = name[0] role_name = name[1] if role_name: - role_name = role_name.split('(')[0].replace('/ ...','').strip() + role_name = role_name.split('(')[0].replace('/ ...','') credits['cast'].append((stripTags(real_name), stripTags(role_name))) self.credits = credits return self.credits diff --git a/scrapeit/utils.py b/scrapeit/utils.py index 6bcc2ff..c4c0f16 100644 --- a/scrapeit/utils.py +++ b/scrapeit/utils.py @@ -125,9 +125,10 @@ def html_entity_decode(s, encoding = 'utf-8'): return u''.join(r) def stripTags(s): - return djangohtml.strip_tags(htmldecode(s)) - - + if s: + return djangohtml.strip_tags(htmldecode(s)).strip() + return u'' + from htmlentitydefs import name2codepoint # This pattern matches a character entity reference (a decimal numeric