From e48e20ad6b79ab08a6b0bcb1c2c176f67f9986ac Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 29 Dec 2010 16:19:34 +0530 Subject: [PATCH] add new file --- pandora/annotaion/utils.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 pandora/annotaion/utils.py diff --git a/pandora/annotaion/utils.py b/pandora/annotaion/utils.py new file mode 100644 index 00000000..133d874e --- /dev/null +++ b/pandora/annotaion/utils.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# ci:si:et:sw=4:sts=4:ts=4 + +def html_parser(text, nofollow=True): + text = text.replace('', '__i__').replace('', '__/i__') + text = text.replace('', '__b__').replace('', '__/b__') + #truns links into wiki links, make sure to only take http links + text = re.sub('(.*?)', '[\\1 \\2]', text) + text = escape(text) + text = text.replace('__i__', '').replace('__/i__', '') + text = text.replace('__b__', '').replace('__/b__', '') + if nofollow: + nofollow_rel = ' rel="nofollow"' + else: + nofollow_rel = '' + + links = re.compile('(\[(http.*?) (.*?)\])').findall(text) + for t, link, txt in links: + link = link.replace('http', '__LINK__').replace('.', '__DOT__') + ll = '%s' % (link, nofollow_rel, txt) + text = text.replace(t, ll) + links = re.compile('(\[(http.*?)\])').findall(text) + for t, link in links: + link = link.replace('http', '__LINK__').replace('.', '__DOT__') + ll = '%s' % (link, nofollow_rel, link) + text = text.replace(t, ll) + + text = urlize(text, nofollow=nofollow) + + #inpage links + text = re.sub('\[(/.+?) (.+?)\]', '\\2', text) + + text = text.replace('__LINK__', 'http').replace('__DOT__', '.') + text = text.replace("\n", '
') + return text +