From e48e20ad6b79ab08a6b0bcb1c2c176f67f9986ac Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Wed, 29 Dec 2010 16:19:34 +0530
Subject: [PATCH] add new file
---
pandora/annotaion/utils.py | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 pandora/annotaion/utils.py
diff --git a/pandora/annotaion/utils.py b/pandora/annotaion/utils.py
new file mode 100644
index 0000000..133d874
--- /dev/null
+++ b/pandora/annotaion/utils.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# ci:si:et:sw=4:sts=4:ts=4
+
+def html_parser(text, nofollow=True):
+ text = text.replace('', '__i__').replace('', '__/i__')
+ text = text.replace('', '__b__').replace('', '__/b__')
+ #truns links into wiki links, make sure to only take http links
+ text = re.sub('(.*?)', '[\\1 \\2]', text)
+ text = escape(text)
+ text = text.replace('__i__', '').replace('__/i__', '')
+ text = text.replace('__b__', '').replace('__/b__', '')
+ if nofollow:
+ nofollow_rel = ' rel="nofollow"'
+ else:
+ nofollow_rel = ''
+
+ links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
+ for t, link, txt in links:
+ link = link.replace('http', '__LINK__').replace('.', '__DOT__')
+ ll = '%s' % (link, nofollow_rel, txt)
+ text = text.replace(t, ll)
+ links = re.compile('(\[(http.*?)\])').findall(text)
+ for t, link in links:
+ link = link.replace('http', '__LINK__').replace('.', '__DOT__')
+ ll = '%s' % (link, nofollow_rel, link)
+ text = text.replace(t, ll)
+
+ text = urlize(text, nofollow=nofollow)
+
+ #inpage links
+ text = re.sub('\[(/.+?) (.+?)\]', '\\2', text)
+
+ text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
+ text = text.replace("\n", '
')
+ return text
+