286 lines
10 KiB
Python
Executable file
286 lines
10 KiB
Python
Executable file
#!/usr/bin/env python
|
|
from __future__ import division
|
|
import os
|
|
import sys
|
|
import hashlib
|
|
import re
|
|
|
|
import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
|
|
root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
|
|
os.chdir(root_dir)
|
|
|
|
#using virtualenv's activate_this.py to reorder sys.path
|
|
activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py')
|
|
execfile(activate_this, dict(__file__=activate_this))
|
|
|
|
sys.path.insert(0, root_dir)
|
|
|
|
from django.core.management import setup_environ
|
|
try:
|
|
import settings # Assumed to be in the same directory.
|
|
except ImportError:
|
|
import sys
|
|
sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
|
|
sys.exit(1)
|
|
settings.DEBUG=False
|
|
|
|
setup_environ(settings)
|
|
|
|
from django.contrib.auth.models import User, Group
|
|
from datetime import datetime
|
|
from ox.utils import json
|
|
import ox
|
|
import monkey_patch.models
|
|
|
|
from item.models import Item, get_item
|
|
from annotation.models import Annotation
|
|
|
|
from archive.models import File
|
|
from urlalias.models import IDAlias, LayerAlias, ListAlias
|
|
from place.models import Place
|
|
from itemlist.models import List
|
|
from django.db import connection, transaction
|
|
from user.models import SessionData
|
|
|
|
|
|
def html_parser(text):
|
|
text = text.strip()
|
|
text = text.replace('<i>', '__i__').replace('</i>', '__/i__')
|
|
text = text.replace('<b>', '__b__').replace('</b>', '__/b__')
|
|
#truns links into wiki links, make sure to only take http links
|
|
text = re.sub('<a .*?href="(http.*?)".*?>(.*?)</a>', '[\\1 \\2]', text)
|
|
text = ox.escape(text)
|
|
text = text.replace('__i__', '<i>').replace('__/i__', '</i>')
|
|
text = text.replace('__b__', '<b>').replace('__/b__', '</b>')
|
|
|
|
links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
|
|
for t, link, txt in links:
|
|
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
|
|
ll = '<a href="%s">%s</a>' % (link, txt)
|
|
text = text.replace(t, ll)
|
|
links = re.compile('(\[(http.*?)\])').findall(text)
|
|
for t, link in links:
|
|
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
|
|
ll = '<a href="%s">%s</a>' % (link, link)
|
|
text = text.replace(t, ll)
|
|
|
|
text = ox.urlize(text, nofollow=False)
|
|
|
|
#inpage links
|
|
text = re.sub('\[(/.+?) (.+?)\]', '<a href="\\1">\\2</a>', text)
|
|
|
|
text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
|
|
|
|
text = text.replace('\n\n', '<br>\n').replace("\n", '<br>\n')
|
|
|
|
return text
|
|
|
|
|
|
|
|
os.chdir(import_dir)
|
|
|
|
with open('padma/users.json') as f: users = json.load(f)
|
|
|
|
with open('padma/files.json') as f: padma = json.load(f)
|
|
|
|
with open('padma/locations.json') as f: locations = json.load(f)
|
|
|
|
with open('padma/lists.json') as f: lists = json.load(f)
|
|
|
|
with open('padma/data.json') as f: padma_data = json.load(f)
|
|
|
|
longest_username = max([len(u['username'].strip()) for u in users]) + 1
|
|
if longest_username > 255:
|
|
print "longer usernames, extending table to", longest_username, 'fix in monkey_patch/models.py'
|
|
cursor = connection.cursor()
|
|
cursor.execute('ALTER TABLE auth_user ALTER COLUMN username TYPE varchar(%d);'%longest_username)
|
|
transaction.commit_unless_managed()
|
|
|
|
print "import users"
|
|
for u in users:
|
|
username = u['username'].strip()
|
|
user, created = User.objects.get_or_create(username=username)
|
|
user.email = u['email']
|
|
if not '@' in user.email:
|
|
user.email = ''
|
|
user.password = u['password']
|
|
user.date_joined = datetime.strptime(u['created'], '%Y-%m-%dT%H:%M:%SZ')
|
|
user.save()
|
|
profile = user.get_profile()
|
|
if not user.email:
|
|
profile.newsletter = False
|
|
if 'admin' in u['groups']:
|
|
profile.set_level('admin')
|
|
else:
|
|
profile.set_level('member')
|
|
profile.save()
|
|
if SessionData.objects.filter(user=user).count() == 0:
|
|
s = SessionData()
|
|
s.user = user
|
|
s.session_key = hashlib.sha1(user.username).hexdigest()
|
|
s.lastseen = user.date_joined
|
|
s.firstseen = user.date_joined
|
|
s.timesseen = 1
|
|
s.save()
|
|
for g in u['groups']:
|
|
if g and g.strip() and g != 'admin':
|
|
group, created = Group.objects.get_or_create(name=g)
|
|
user.groups.add(group)
|
|
|
|
def item_data(data):
|
|
d = {}
|
|
for key in data:
|
|
if key in data:
|
|
value = data[key]
|
|
if isinstance(value, basestring):
|
|
value = value.replace('\r\n', '\n').strip()
|
|
d[{
|
|
u'id': u'oldId',
|
|
u'categories': u'topic',
|
|
u'source': u'project',
|
|
u'collection': u'source',
|
|
u'languages': u'language',
|
|
}.get(key, key)] = value
|
|
if 'director' in d:
|
|
d['director'] = unicode(d['director']).replace(' and ', ', ').strip().split(', ')
|
|
d['director'] = filter(lambda x: x.strip().lower() not in ('none', 'n/a', '', 'various'),
|
|
d['director'])
|
|
for key in ('layers', 'duration', 'size', 'public'):
|
|
if key in d:
|
|
del d[key]
|
|
d['license'] = ['Pad.ma General Public License']
|
|
return d
|
|
|
|
def import_layers(item, layers):
|
|
Annotation.objects.filter(item=item).delete()
|
|
print "importing %d annotations" % len(layers)
|
|
with transaction.commit_on_success():
|
|
for layer in layers:
|
|
oldLayerId = layer['id']
|
|
layer_name = '%ss'%layer['track']
|
|
layer_name = {
|
|
'locations': 'places'
|
|
}.get(layer_name, layer_name)
|
|
annotation = Annotation(item=item, layer=layer_name)
|
|
annotation.start = float(layer['time_in'])/1000
|
|
annotation.end = float(layer['time_out'])/1000
|
|
if annotation.end < annotation.start:
|
|
annotation.end, annotation.start = annotation.start, annotation.end
|
|
username = layer['creator'].strip()
|
|
annotation.user = User.objects.get(username=username)
|
|
annotation.value = html_parser(layer['value'])
|
|
annotation.created = datetime.fromtimestamp(int(layer['created']))
|
|
annotation.modified = datetime.fromtimestamp(int(layer['modified']))
|
|
annotation.save()
|
|
#migration alias
|
|
alias, created = LayerAlias.objects.get_or_create(old=oldLayerId)
|
|
alias.new = annotation.public_id
|
|
alias.save()
|
|
|
|
for oldId in sorted(padma, key=lambda x: padma[x]['created']):
|
|
item = get_item({
|
|
'title': padma_data[oldId]['title']
|
|
})
|
|
print '\n', oldId, item.itemId
|
|
#if True:
|
|
data = padma_data[oldId]
|
|
_data = item_data(data)
|
|
username = _data.pop('creator').strip()
|
|
item.user = User.objects.get(username=username)
|
|
for key in _data:
|
|
item.data[key] = _data[key]
|
|
if 'collection' in data and data['collection']:
|
|
group, created = Group.objects.get_or_create(name=data['collection'])
|
|
item.groups.add(group)
|
|
if 'poster_frame' in item.data:
|
|
item.poster_frame = float(item.data.pop('poster_frame')) / 1000
|
|
if 'published' in item.data:
|
|
item.published = datetime.fromtimestamp(int(item.data.pop('published')))
|
|
if 'created' in item.data:
|
|
item.created = datetime.fromtimestamp(int(item.data.pop('created')))
|
|
if 'modified' in item.data:
|
|
item.modified = datetime.fromtimestamp(int(item.data.pop('modified')))
|
|
item.level = not data.get('public', False) and 2 or 0
|
|
item.save()
|
|
item.make_poster(True)
|
|
import_layers(item, data['layers'])
|
|
#link file
|
|
if oldId in padma:
|
|
if padma[oldId]['oshash']:
|
|
print 'add file', padma[oldId]['oshash']
|
|
oshash = padma[oldId]['oshash']
|
|
qs = File.objects.filter(oshash=oshash)
|
|
if qs.count() == 0:
|
|
f = File()
|
|
f.oshash = oshash
|
|
else:
|
|
f = qs[0]
|
|
f.item = item
|
|
f.path = padma[oldId].get('file', '')
|
|
f.save()
|
|
if 'ogg_oshash' in padma[oldId]:
|
|
print 'add file', padma[oldId]['ogg_oshash']
|
|
oshash = padma[oldId]['ogg_oshash']
|
|
qs = File.objects.filter(oshash=oshash)
|
|
if qs.count() == 0:
|
|
f = File()
|
|
f.oshash = oshash
|
|
else:
|
|
f = qs[0]
|
|
f.item = item
|
|
f.path = padma[oldId].get('ogg', '')
|
|
f.save()
|
|
alias, created = IDAlias.objects.get_or_create(old=oldId)
|
|
alias.new = item.itemId
|
|
alias.save()
|
|
print item, item.itemId
|
|
|
|
print "import lists"
|
|
for l in lists:
|
|
l['user'] = User.objects.get(username=l['user'].strip())
|
|
p,c = List.objects.get_or_create(name=l['title'], user=l['user'])
|
|
p.type = l['type'] == 'static' and 'static' or 'smart'
|
|
p.status = l['public'] and 'featured' or 'private'
|
|
p.description = html_parser(l['description'])
|
|
p.save()
|
|
if l['type'] == 'static':
|
|
for v in l['items']:
|
|
try:
|
|
itemId = IDAlias.objects.get(old=v).new
|
|
i = Item.objects.get(itemId=itemId)
|
|
p.add(i)
|
|
except Item.DoesNotExist:
|
|
print p.name, v
|
|
else:
|
|
key = l['query']['key']
|
|
value= l['query']['value']
|
|
if key == '': key = '*'
|
|
p.query = {'conditions': [{'key': key, 'value': value, 'operator': '='}], 'operator': '&'}
|
|
p.save()
|
|
alias, created = ListAlias.objects.get_or_create(old=l['id'])
|
|
alias.new = p.get_id()
|
|
alias.save()
|
|
|
|
#Places
|
|
print "import places"
|
|
for l in locations:
|
|
oldId = l.pop('id')
|
|
if 'user' in l:
|
|
l['user'] = User.objects.get(username=l['user'].strip())
|
|
else:
|
|
l['user'] = User.objects.all().order_by('id')[0]
|
|
l['name'] = ox.decodeHtml(l['name'])
|
|
l['created'] = datetime.fromtimestamp(int(l['created']))
|
|
l['modified'] = datetime.fromtimestamp(int(l['modified']))
|
|
l['alternativeNames'] = tuple(l['alternativeNames'])
|
|
l['geoname'] = l['name']
|
|
l['type'] = 'city'
|
|
p, c = Place.objects.get_or_create(name=l['name'])
|
|
for key in l:
|
|
if key != 'annotations':
|
|
setattr(p, key, l[key])
|
|
p.save()
|
|
#FIXME matches
|
|
|
|
#fixme update links in annotations
|
|
|