From 80722c0aca4f7a3715933cb9c46b0ff7671f8f67 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sun, 25 Dec 2011 18:25:02 +0530
Subject: [PATCH] format annotations

---
 import_padma.py | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/import_padma.py b/import_padma.py
index eaf65a3..d939ce5 100755
--- a/import_padma.py
+++ b/import_padma.py
@@ -3,6 +3,7 @@ from __future__ import division
 import os
 import sys
 import hashlib
+import re
 
 import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
 root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
@@ -33,6 +34,7 @@ import monkey_patch.models
 
 from item.models import Item, get_item
 from annotation.models import Annotation
+
 from archive.models import File
 from urlalias.models import IDAlias, LayerAlias, ListAlias
 from place.models import Place
@@ -41,6 +43,40 @@ from django.db import connection, transaction
 from user.models import SessionData
 
 
+def html_parser(text):
+    text = text.strip()
+    text = text.replace('<i>', '__i__').replace('</i>', '__/i__')
+    text = text.replace('<b>', '__b__').replace('</b>', '__/b__')
+    #truns links into wiki links, make sure to only take http links
+    text = re.sub('<a .*?href="(http.*?)".*?>(.*?)</a>', '[\\1 \\2]', text)
+    text = ox.escape(text)
+    text = text.replace('__i__', '<i>').replace('__/i__', '</i>')
+    text = text.replace('__b__', '<b>').replace('__/b__', '</b>')
+
+    links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
+    for t, link, txt in links:
+        link = link.replace('http', '__LINK__').replace('.', '__DOT__')
+        ll = '<a href="%s">%s</a>' % (link, txt)
+        text = text.replace(t, ll)
+    links = re.compile('(\[(http.*?)\])').findall(text)
+    for t, link in links:
+        link = link.replace('http', '__LINK__').replace('.', '__DOT__')
+        ll = '<a href="%s">%s</a>' % (link, link)
+        text = text.replace(t, ll)
+
+    text = ox.urlize(text, nofollow=False)
+
+    #inpage links
+    text = re.sub('\[(/.+?) (.+?)\]', '<a href="\\1">\\2</a>', text)
+
+    text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
+    text = text.replace("\n\n", '</p><p>\n')
+    text = text.replace("\n", '<br>\n').replace('</p><p><br>\n', '</p><p>\n')
+    text = u'<p>%s</p>'%text 
+    return text
+
+
+
 os.chdir(import_dir)
 
 with open('padma/users.json') as f: users = json.load(f)
@@ -122,7 +158,7 @@ def import_layers(item, layers):
             annotation.end = float(layer['time_out'])/1000
             username = layer['creator'].strip()
             annotation.user = User.objects.get(username=username)
-            annotation.value = layer['value']
+            annotation.value = html_parser(layer['value'], False)
             annotation.created = datetime.fromtimestamp(int(layer['created']))
             annotation.modified = datetime.fromtimestamp(int(layer['modified']))
             annotation.save()
@@ -232,4 +268,5 @@ for l in locations:
     p.save()
     #FIXME matches
 
+#fixme update links in annotations