format annotations

This commit is contained in:
j 2011-12-25 18:25:02 +05:30
parent 7cd2690d56
commit 80722c0aca
1 changed files with 38 additions and 1 deletions

View File

@ -3,6 +3,7 @@ from __future__ import division
import os
import sys
import hashlib
import re
import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
@ -33,6 +34,7 @@ import monkey_patch.models
from item.models import Item, get_item
from annotation.models import Annotation
from archive.models import File
from urlalias.models import IDAlias, LayerAlias, ListAlias
from place.models import Place
@ -41,6 +43,40 @@ from django.db import connection, transaction
from user.models import SessionData
def html_parser(text):
text = text.strip()
text = text.replace('<i>', '__i__').replace('</i>', '__/i__')
text = text.replace('<b>', '__b__').replace('</b>', '__/b__')
#truns links into wiki links, make sure to only take http links
text = re.sub('<a .*?href="(http.*?)".*?>(.*?)</a>', '[\\1 \\2]', text)
text = ox.escape(text)
text = text.replace('__i__', '<i>').replace('__/i__', '</i>')
text = text.replace('__b__', '<b>').replace('__/b__', '</b>')
links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
for t, link, txt in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, txt)
text = text.replace(t, ll)
links = re.compile('(\[(http.*?)\])').findall(text)
for t, link in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, link)
text = text.replace(t, ll)
text = ox.urlize(text, nofollow=False)
#inpage links
text = re.sub('\[(/.+?) (.+?)\]', '<a href="\\1">\\2</a>', text)
text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
text = text.replace("\n\n", '</p><p>\n')
text = text.replace("\n", '<br>\n').replace('</p><p><br>\n', '</p><p>\n')
text = u'<p>%s</p>'%text
return text
os.chdir(import_dir)
with open('padma/users.json') as f: users = json.load(f)
@ -122,7 +158,7 @@ def import_layers(item, layers):
annotation.end = float(layer['time_out'])/1000
username = layer['creator'].strip()
annotation.user = User.objects.get(username=username)
annotation.value = layer['value']
annotation.value = html_parser(layer['value'], False)
annotation.created = datetime.fromtimestamp(int(layer['created']))
annotation.modified = datetime.fromtimestamp(int(layer['modified']))
annotation.save()
@ -232,4 +268,5 @@ for l in locations:
p.save()
#FIXME matches
#fixme update links in annotations