From 35e8958efdeca0e3640688d93016471959c51ab7 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 30 Jul 2007 11:53:55 +0000 Subject: [PATCH] strip spaces after removing tags --- scrapeit/imdb.py | 2 +- scrapeit/utils.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index db4701f..b6fb00a 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -278,7 +278,7 @@ class IMDb: real_name = name[0] role_name = name[1] if role_name: - role_name = role_name.split('(')[0].replace('/ ...','').strip() + role_name = role_name.split('(')[0].replace('/ ...','') credits['cast'].append((stripTags(real_name), stripTags(role_name))) self.credits = credits return self.credits diff --git a/scrapeit/utils.py b/scrapeit/utils.py index 6bcc2ff..c4c0f16 100644 --- a/scrapeit/utils.py +++ b/scrapeit/utils.py @@ -125,9 +125,10 @@ def html_entity_decode(s, encoding = 'utf-8'): return u''.join(r) def stripTags(s): - return djangohtml.strip_tags(htmldecode(s)) - - + if s: + return djangohtml.strip_tags(htmldecode(s)).strip() + return u'' + from htmlentitydefs import name2codepoint # This pattern matches a character entity reference (a decimal numeric