From 99554cb461ba26a57815b2e154bcc0df08abcf3c Mon Sep 17 00:00:00 2001 From: j Date: Wed, 20 Mar 2024 12:55:14 +0100 Subject: [PATCH] fix add_link --- ox/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ox/html.py b/ox/html.py index 28dcec1..f7ca816 100644 --- a/ox/html.py +++ b/ox/html.py @@ -16,7 +16,7 @@ TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>', "'", '"'] DOTS = ['·', '*', '\xe2\x80\xa2', '•', '•', '•'] unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') -word_split_re = re.compile(r'(\s+)') +word_split_re = re.compile(r'(\s+|
)') punctuation_re = re.compile('^(?P(?:%s)*)(?P.*?)(?P(?:%s)*)$' % ( '|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) @@ -393,7 +393,7 @@ def sanitize_html(html, tags=None, global_attributes=[]): parts[i] = escape_html(decode_html(part)) html = ''.join(parts) html = add_links(html) - html = html.replace('\n\n', '

') + html = html.replace('\n\n', '

') return sanitize_fragment(html) def split_tags(string):