format annotations

This commit is contained in:
j 2011-12-25 18:25:02 +05:30
parent 7cd2690d56
commit 80722c0aca

View file

@ -3,6 +3,7 @@ from __future__ import division
import os import os
import sys import sys
import hashlib import hashlib
import re
import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
root_dir = os.path.normpath(os.path.abspath(sys.argv[1])) root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
@ -33,6 +34,7 @@ import monkey_patch.models
from item.models import Item, get_item from item.models import Item, get_item
from annotation.models import Annotation from annotation.models import Annotation
from archive.models import File from archive.models import File
from urlalias.models import IDAlias, LayerAlias, ListAlias from urlalias.models import IDAlias, LayerAlias, ListAlias
from place.models import Place from place.models import Place
@ -41,6 +43,40 @@ from django.db import connection, transaction
from user.models import SessionData from user.models import SessionData
def html_parser(text):
text = text.strip()
text = text.replace('<i>', '__i__').replace('</i>', '__/i__')
text = text.replace('<b>', '__b__').replace('</b>', '__/b__')
#truns links into wiki links, make sure to only take http links
text = re.sub('<a .*?href="(http.*?)".*?>(.*?)</a>', '[\\1 \\2]', text)
text = ox.escape(text)
text = text.replace('__i__', '<i>').replace('__/i__', '</i>')
text = text.replace('__b__', '<b>').replace('__/b__', '</b>')
links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
for t, link, txt in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, txt)
text = text.replace(t, ll)
links = re.compile('(\[(http.*?)\])').findall(text)
for t, link in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, link)
text = text.replace(t, ll)
text = ox.urlize(text, nofollow=False)
#inpage links
text = re.sub('\[(/.+?) (.+?)\]', '<a href="\\1">\\2</a>', text)
text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
text = text.replace("\n\n", '</p><p>\n')
text = text.replace("\n", '<br>\n').replace('</p><p><br>\n', '</p><p>\n')
text = u'<p>%s</p>'%text
return text
os.chdir(import_dir) os.chdir(import_dir)
with open('padma/users.json') as f: users = json.load(f) with open('padma/users.json') as f: users = json.load(f)
@ -122,7 +158,7 @@ def import_layers(item, layers):
annotation.end = float(layer['time_out'])/1000 annotation.end = float(layer['time_out'])/1000
username = layer['creator'].strip() username = layer['creator'].strip()
annotation.user = User.objects.get(username=username) annotation.user = User.objects.get(username=username)
annotation.value = layer['value'] annotation.value = html_parser(layer['value'], False)
annotation.created = datetime.fromtimestamp(int(layer['created'])) annotation.created = datetime.fromtimestamp(int(layer['created']))
annotation.modified = datetime.fromtimestamp(int(layer['modified'])) annotation.modified = datetime.fromtimestamp(int(layer['modified']))
annotation.save() annotation.save()
@ -232,4 +268,5 @@ for l in locations:
p.save() p.save()
#FIXME matches #FIXME matches
#fixme update links in annotations