apos not in name2codepoint, also decode that

This commit is contained in:
j 2012-04-24 19:00:48 +02:00
parent 6ed4a2d867
commit 5a00be4b37

View file

@ -139,6 +139,8 @@ def decodeHtml(html):
u'me & you and $&%'
>>> decodeHtml('€')
u''
>>> decodeHtml('Anniversary of Daoud's Republic')
u'Anniversary of Daoud's Republic'
"""
if type(html) != unicode:
html = unicode(html)[:]
@ -156,6 +158,8 @@ def decodeHtml(html):
return uchr(int(entity[1:]))
elif entity in name2codepoint:
return uchr(name2codepoint[entity])
elif entity == 'apos':
return "'"
else:
return match.group(0)
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
@ -211,6 +215,8 @@ def parse_html(html, tags=None, wikilinks=False):
'<b>foo</b>'
>>> parse_html('<b>foo</b></b>')
'<b>foo</b>'
>>> parse_html('Anniversary of Daoud&apos;s Republic')
'Anniversary of Daoud&apos;s Republic'
'''
if not tags:
tags = [