apos not in name2codepoint, also decode that
This commit is contained in:
parent
6ed4a2d867
commit
5a00be4b37
1 changed files with 7 additions and 1 deletions
|
@ -139,6 +139,8 @@ def decodeHtml(html):
|
|||
u'me & you and $&%'
|
||||
>>> decodeHtml('€')
|
||||
u'€'
|
||||
>>> decodeHtml('Anniversary of Daoud's Republic')
|
||||
u'Anniversary of Daoud's Republic'
|
||||
"""
|
||||
if type(html) != unicode:
|
||||
html = unicode(html)[:]
|
||||
|
@ -156,6 +158,8 @@ def decodeHtml(html):
|
|||
return uchr(int(entity[1:]))
|
||||
elif entity in name2codepoint:
|
||||
return uchr(name2codepoint[entity])
|
||||
elif entity == 'apos':
|
||||
return "'"
|
||||
else:
|
||||
return match.group(0)
|
||||
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
|
||||
|
@ -211,6 +215,8 @@ def parse_html(html, tags=None, wikilinks=False):
|
|||
'<b>foo</b>'
|
||||
>>> parse_html('<b>foo</b></b>')
|
||||
'<b>foo</b>'
|
||||
>>> parse_html('Anniversary of Daoud's Republic')
|
||||
'Anniversary of Daoud's Republic'
|
||||
'''
|
||||
if not tags:
|
||||
tags = [
|
||||
|
|
Loading…
Reference in a new issue