padma_migration/import_padma.py

288 lines
10 KiB
Python
Executable File

#!/usr/bin/env python
from __future__ import division
import os
import sys
import hashlib
import re
import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
os.chdir(root_dir)
#using virtualenv's activate_this.py to reorder sys.path
activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py')
execfile(activate_this, dict(__file__=activate_this))
sys.path.insert(0, root_dir)
from django.core.management import setup_environ
try:
import settings # Assumed to be in the same directory.
except ImportError:
import sys
sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
sys.exit(1)
settings.DEBUG=False
setup_environ(settings)
from django.contrib.auth.models import User, Group
from datetime import datetime
from ox.utils import json
import ox
import monkey_patch.models
from item.models import Item, get_item
from annotation.models import Annotation
from archive.models import File
from urlalias.models import IDAlias, LayerAlias, ListAlias
from place.models import Place
from itemlist.models import List
from django.db import connection, transaction
from user.models import SessionData
def html_parser(text):
text = text.strip()
text = text.replace('<i>', '__i__').replace('</i>', '__/i__')
text = text.replace('<b>', '__b__').replace('</b>', '__/b__')
#truns links into wiki links, make sure to only take http links
text = re.sub('<a .*?href="(http.*?)".*?>(.*?)</a>', '[\\1 \\2]', text)
text = ox.escape(text)
text = text.replace('__i__', '<i>').replace('__/i__', '</i>')
text = text.replace('__b__', '<b>').replace('__/b__', '</b>')
links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
for t, link, txt in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, txt)
text = text.replace(t, ll)
links = re.compile('(\[(http.*?)\])').findall(text)
for t, link in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, link)
text = text.replace(t, ll)
text = ox.urlize(text, nofollow=False)
#inpage links
text = re.sub('\[(/.+?) (.+?)\]', '<a href="\\1">\\2</a>', text)
text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
text = text.replace('\n\n', '<br>\n').replace("\n", '<br>\n')
return text
os.chdir(import_dir)
with open('padma/users.json') as f: users = json.load(f)
with open('padma/files.json') as f: padma = json.load(f)
with open('padma/locations.json') as f: locations = json.load(f)
with open('padma/lists.json') as f: lists = json.load(f)
with open('padma/data.json') as f: padma_data = json.load(f)
longest_username = max([len(u['username'].strip()) for u in users]) + 1
if longest_username > 255:
print "longer usernames, extending table to", longest_username, 'fix in monkey_patch/models.py'
cursor = connection.cursor()
cursor.execute('ALTER TABLE auth_user ALTER COLUMN username TYPE varchar(%d);'%longest_username)
transaction.commit_unless_managed()
print "import users"
for u in users:
username = u['username'].strip()
user, created = User.objects.get_or_create(username=username)
user.email = u['email']
if not '@' in user.email:
user.email = ''
user.password = u['password']
user.date_joined = datetime.strptime(u['created'], '%Y-%m-%dT%H:%M:%SZ')
user.save()
profile = user.get_profile()
if not user.email:
profile.newsletter = False
if 'admin' in u['groups']:
profile.set_level('admin')
else:
profile.set_level('member')
profile.save()
if SessionData.objects.filter(user=user).count() == 0:
s = SessionData()
s.user = user
s.session_key = hashlib.sha1(user.username).hexdigest()
s.lastseen = user.date_joined
s.firstseen = user.date_joined
s.timesseen = 1
s.save()
for g in u['groups']:
if g and g.strip() and g != 'admin':
group, created = Group.objects.get_or_create(name=g)
user.groups.add(group)
def item_data(data):
d = {}
for key in data:
if key in data:
value = data[key]
if isinstance(value, basestring):
value = value.replace('\r\n', '\n').strip()
d[{
u'id': u'oldId',
u'categories': u'topic',
u'source': u'project',
u'collection': u'source',
u'languages': u'language',
u'description': u'summary',
}.get(key, key)] = value
if 'director' in d:
d['director'] = unicode(d['director']).replace(' and ', ', ').strip().split(', ')
d['director'] = filter(lambda x: x.strip().lower() not in ('none', 'n/a', '', 'various'),
d['director'])
for key in ('layers', 'duration', 'size', 'public'):
if key in d:
del d[key]
d['license'] = ['Pad.ma General Public License']
return d
def import_layers(item, layers):
Annotation.objects.filter(item=item).delete()
print "importing %d annotations" % len(layers)
with transaction.commit_on_success():
for layer in layers:
oldLayerId = layer['id']
layer_name = '%ss'%layer['track']
layer_name = {
'locations': 'places'
}.get(layer_name, layer_name)
annotation = Annotation(item=item, layer=layer_name)
annotation.start = max(float(layer['time_in'])/1000, 0)
annotation.end = max(float(layer['time_out'])/1000, 0)
if annotation.end < annotation.start:
annotation.end, annotation.start = annotation.start, annotation.end
username = layer['creator'].strip()
annotation.user = User.objects.get(username=username)
annotation.value = html_parser(layer['value'])
annotation.created = datetime.fromtimestamp(int(layer['created']))
annotation.modified = datetime.fromtimestamp(int(layer['modified']))
annotation.save()
#migration alias
alias, created = LayerAlias.objects.get_or_create(old=oldLayerId)
alias.new = annotation.public_id
alias.save()
for oldId in sorted(padma, key=lambda x: padma[x]['created']):
item = get_item({
'title': padma_data[oldId]['title']
})
print '\n', oldId, item.itemId
#if True:
data = padma_data[oldId]
_data = item_data(data)
username = _data.pop('creator').strip()
item.user = User.objects.get(username=username)
for key in _data:
item.data[key] = _data[key]
if 'collection' in data and data['collection']:
group, created = Group.objects.get_or_create(name=data['collection'])
item.groups.add(group)
if 'poster_frame' in item.data:
item.poster_frame = float(item.data.pop('poster_frame')) / 1000
if 'published' in item.data:
item.published = datetime.fromtimestamp(int(item.data.pop('published')))
if 'created' in item.data:
item.created = datetime.fromtimestamp(int(item.data.pop('created')))
if 'modified' in item.data:
item.modified = datetime.fromtimestamp(int(item.data.pop('modified')))
item.level = not data.get('public', False) and 2 or 0
item.save()
item.make_poster(True)
import_layers(item, data['layers'])
#link file
if oldId in padma:
if padma[oldId]['oshash']:
print 'add file', padma[oldId]['oshash']
oshash = padma[oldId]['oshash']
qs = File.objects.filter(oshash=oshash)
if qs.count() == 0:
f = File()
f.oshash = oshash
else:
f = qs[0]
f.item = item
f.path = padma[oldId].get('file', '')
f.save()
if 'ogg_oshash' in padma[oldId]:
print 'add file', padma[oldId]['ogg_oshash']
oshash = padma[oldId]['ogg_oshash']
qs = File.objects.filter(oshash=oshash)
if qs.count() == 0:
f = File()
f.oshash = oshash
else:
f = qs[0]
f.item = item
f.path = padma[oldId].get('ogg', '')
f.save()
alias, created = IDAlias.objects.get_or_create(old=oldId)
alias.new = item.itemId
alias.save()
print item, item.itemId
print "import lists"
for l in lists:
l['user'] = User.objects.get(username=l['user'].strip())
p,c = List.objects.get_or_create(name=l['title'], user=l['user'])
p.type = l['type'] == 'static' and 'static' or 'smart'
p.status = l['public'] and 'featured' or 'private'
p.description = html_parser(l['description'])
p.save()
if l['type'] == 'static':
for v in l['items']:
try:
itemId = IDAlias.objects.get(old=v).new
i = Item.objects.get(itemId=itemId)
p.add(i)
except Item.DoesNotExist:
print p.name, v
else:
key = l['query']['key']
value= l['query']['value']
if key == '': key = '*'
p.query = {'conditions': [{'key': key, 'value': value, 'operator': '='}], 'operator': '&'}
p.save()
alias, created = ListAlias.objects.get_or_create(old=l['id'])
alias.new = p.get_id()
alias.save()
#Places
print "import places"
for l in locations:
oldId = l.pop('id')
if 'user' in l:
l['user'] = User.objects.get(username=l['user'].strip())
else:
l['user'] = User.objects.all().order_by('id')[0]
l['name'] = ox.decode_html(l['name'])
l['created'] = datetime.fromtimestamp(int(l['created']))
l['modified'] = datetime.fromtimestamp(int(l['modified']))
l['alternativeNames'] = tuple(l['alternativeNames'])
l['geoname'] = l['name']
l['type'] = 'feature'
p, c = Place.objects.get_or_create(name=l['name'])
for key in l:
if key != 'annotations':
setattr(p, key, l[key])
p.save()
#FIXME matches
#fixme update links in annotations