From 80722c0aca4f7a3715933cb9c46b0ff7671f8f67 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 25 Dec 2011 18:25:02 +0530 Subject: [PATCH] format annotations --- import_padma.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/import_padma.py b/import_padma.py index eaf65a3..d939ce5 100755 --- a/import_padma.py +++ b/import_padma.py @@ -3,6 +3,7 @@ from __future__ import division import os import sys import hashlib +import re import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) root_dir = os.path.normpath(os.path.abspath(sys.argv[1])) @@ -33,6 +34,7 @@ import monkey_patch.models from item.models import Item, get_item from annotation.models import Annotation + from archive.models import File from urlalias.models import IDAlias, LayerAlias, ListAlias from place.models import Place @@ -41,6 +43,40 @@ from django.db import connection, transaction from user.models import SessionData +def html_parser(text): + text = text.strip() + text = text.replace('', '__i__').replace('', '__/i__') + text = text.replace('', '__b__').replace('', '__/b__') + #truns links into wiki links, make sure to only take http links + text = re.sub('(.*?)', '[\\1 \\2]', text) + text = ox.escape(text) + text = text.replace('__i__', '').replace('__/i__', '') + text = text.replace('__b__', '').replace('__/b__', '') + + links = re.compile('(\[(http.*?) (.*?)\])').findall(text) + for t, link, txt in links: + link = link.replace('http', '__LINK__').replace('.', '__DOT__') + ll = '%s' % (link, txt) + text = text.replace(t, ll) + links = re.compile('(\[(http.*?)\])').findall(text) + for t, link in links: + link = link.replace('http', '__LINK__').replace('.', '__DOT__') + ll = '%s' % (link, link) + text = text.replace(t, ll) + + text = ox.urlize(text, nofollow=False) + + #inpage links + text = re.sub('\[(/.+?) (.+?)\]', '\\2', text) + + text = text.replace('__LINK__', 'http').replace('__DOT__', '.') + text = text.replace("\n\n", '

\n') + text = text.replace("\n", '
\n').replace('


\n', '

\n') + text = u'

%s

'%text + return text + + + os.chdir(import_dir) with open('padma/users.json') as f: users = json.load(f) @@ -122,7 +158,7 @@ def import_layers(item, layers): annotation.end = float(layer['time_out'])/1000 username = layer['creator'].strip() annotation.user = User.objects.get(username=username) - annotation.value = layer['value'] + annotation.value = html_parser(layer['value'], False) annotation.created = datetime.fromtimestamp(int(layer['created'])) annotation.modified = datetime.fromtimestamp(int(layer['modified'])) annotation.save() @@ -232,4 +268,5 @@ for l in locations: p.save() #FIXME matches +#fixme update links in annotations