padma_migration/import_padma.py

288 lines
10 KiB
Python
Raw Permalink Normal View History

2011-12-04 13:33:48 +00:00
#!/usr/bin/env python
from __future__ import division
import os
import sys
2011-12-05 13:49:34 +00:00
import hashlib
2011-12-25 12:55:02 +00:00
import re
2011-12-04 13:33:48 +00:00
import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
os.chdir(root_dir)
#using virtualenv's activate_this.py to reorder sys.path
activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py')
execfile(activate_this, dict(__file__=activate_this))
sys.path.insert(0, root_dir)
from django.core.management import setup_environ
try:
import settings # Assumed to be in the same directory.
except ImportError:
import sys
sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
sys.exit(1)
settings.DEBUG=False
setup_environ(settings)
from django.contrib.auth.models import User, Group
from datetime import datetime
from ox.utils import json
import ox
2011-12-05 13:49:34 +00:00
import monkey_patch.models
from item.models import Item, get_item
2011-12-04 17:05:48 +00:00
from annotation.models import Annotation
2011-12-25 12:55:02 +00:00
2011-12-04 13:33:48 +00:00
from archive.models import File
2011-12-04 17:05:48 +00:00
from urlalias.models import IDAlias, LayerAlias, ListAlias
from place.models import Place
from itemlist.models import List
2011-12-04 13:33:48 +00:00
from django.db import connection, transaction
2011-12-05 13:49:34 +00:00
from user.models import SessionData
2011-12-04 13:33:48 +00:00
2011-12-25 12:55:02 +00:00
def html_parser(text):
text = text.strip()
text = text.replace('<i>', '__i__').replace('</i>', '__/i__')
text = text.replace('<b>', '__b__').replace('</b>', '__/b__')
#truns links into wiki links, make sure to only take http links
text = re.sub('<a .*?href="(http.*?)".*?>(.*?)</a>', '[\\1 \\2]', text)
text = ox.escape(text)
text = text.replace('__i__', '<i>').replace('__/i__', '</i>')
text = text.replace('__b__', '<b>').replace('__/b__', '</b>')
links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
for t, link, txt in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, txt)
text = text.replace(t, ll)
links = re.compile('(\[(http.*?)\])').findall(text)
for t, link in links:
link = link.replace('http', '__LINK__').replace('.', '__DOT__')
ll = '<a href="%s">%s</a>' % (link, link)
text = text.replace(t, ll)
text = ox.urlize(text, nofollow=False)
#inpage links
text = re.sub('\[(/.+?) (.+?)\]', '<a href="\\1">\\2</a>', text)
text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
2011-12-25 13:25:40 +00:00
text = text.replace('\n\n', '<br>\n').replace("\n", '<br>\n')
2011-12-25 12:55:02 +00:00
return text
2011-12-04 13:33:48 +00:00
os.chdir(import_dir)
2011-12-05 13:49:34 +00:00
with open('padma/users.json') as f: users = json.load(f)
2011-12-04 13:33:48 +00:00
2011-12-05 13:49:34 +00:00
with open('padma/files.json') as f: padma = json.load(f)
2011-12-04 13:33:48 +00:00
2011-12-05 13:49:34 +00:00
with open('padma/locations.json') as f: locations = json.load(f)
2011-12-04 17:05:48 +00:00
2011-12-05 13:49:34 +00:00
with open('padma/lists.json') as f: lists = json.load(f)
2011-12-04 17:05:48 +00:00
2011-12-05 13:49:34 +00:00
with open('padma/data.json') as f: padma_data = json.load(f)
2011-12-04 13:33:48 +00:00
longest_username = max([len(u['username'].strip()) for u in users]) + 1
if longest_username > 255:
print "longer usernames, extending table to", longest_username, 'fix in monkey_patch/models.py'
cursor = connection.cursor()
cursor.execute('ALTER TABLE auth_user ALTER COLUMN username TYPE varchar(%d);'%longest_username)
transaction.commit_unless_managed()
2011-12-05 13:49:34 +00:00
print "import users"
2011-12-04 13:33:48 +00:00
for u in users:
2011-12-18 09:49:28 +00:00
username = u['username'].strip()
2011-12-04 13:33:48 +00:00
user, created = User.objects.get_or_create(username=username)
user.email = u['email']
if not '@' in user.email:
user.email = ''
2011-12-04 13:33:48 +00:00
user.password = u['password']
user.date_joined = datetime.strptime(u['created'], '%Y-%m-%dT%H:%M:%SZ')
user.save()
profile = user.get_profile()
2011-12-18 09:49:28 +00:00
if not user.email:
profile.newsletter = False
2011-12-05 13:49:34 +00:00
if 'admin' in u['groups']:
2011-12-04 13:33:48 +00:00
profile.set_level('admin')
else:
profile.set_level('member')
profile.save()
2011-12-05 13:49:34 +00:00
if SessionData.objects.filter(user=user).count() == 0:
s = SessionData()
s.user = user
s.session_key = hashlib.sha1(user.username).hexdigest()
s.lastseen = user.date_joined
s.firstseen = user.date_joined
2011-12-05 13:49:34 +00:00
s.timesseen = 1
s.save()
2011-12-04 13:33:48 +00:00
for g in u['groups']:
2011-12-04 17:05:48 +00:00
if g and g.strip() and g != 'admin':
2011-12-04 13:33:48 +00:00
group, created = Group.objects.get_or_create(name=g)
user.groups.add(group)
def item_data(data):
d = {}
for key in data:
if key in data:
value = data[key]
if isinstance(value, basestring):
value = value.replace('\r\n', '\n').strip()
d[{
u'id': u'oldId',
2012-01-10 10:09:22 +00:00
u'categories': u'topic',
u'source': u'project',
u'collection': u'source',
2011-12-04 13:33:48 +00:00
u'languages': u'language',
2012-02-16 09:29:00 +00:00
u'description': u'summary',
2011-12-04 13:33:48 +00:00
}.get(key, key)] = value
if 'director' in d:
2012-01-10 10:09:22 +00:00
d['director'] = unicode(d['director']).replace(' and ', ', ').strip().split(', ')
d['director'] = filter(lambda x: x.strip().lower() not in ('none', 'n/a', '', 'various'),
d['director'])
2011-12-04 13:33:48 +00:00
for key in ('layers', 'duration', 'size', 'public'):
if key in d:
del d[key]
2012-01-20 20:28:51 +00:00
d['license'] = ['Pad.ma General Public License']
2011-12-04 13:33:48 +00:00
return d
def import_layers(item, layers):
Annotation.objects.filter(item=item).delete()
print "importing %d annotations" % len(layers)
2011-12-05 13:49:34 +00:00
with transaction.commit_on_success():
for layer in layers:
oldLayerId = layer['id']
layer_name = '%ss'%layer['track']
layer_name = {
'locations': 'places'
}.get(layer_name, layer_name)
2011-12-05 13:49:34 +00:00
annotation = Annotation(item=item, layer=layer_name)
2012-02-16 09:29:00 +00:00
annotation.start = max(float(layer['time_in'])/1000, 0)
annotation.end = max(float(layer['time_out'])/1000, 0)
if annotation.end < annotation.start:
2012-01-20 20:19:38 +00:00
annotation.end, annotation.start = annotation.start, annotation.end
2011-12-18 09:49:28 +00:00
username = layer['creator'].strip()
2011-12-05 13:49:34 +00:00
annotation.user = User.objects.get(username=username)
2011-12-25 13:25:40 +00:00
annotation.value = html_parser(layer['value'])
2011-12-05 13:49:34 +00:00
annotation.created = datetime.fromtimestamp(int(layer['created']))
annotation.modified = datetime.fromtimestamp(int(layer['modified']))
annotation.save()
#migration alias
alias, created = LayerAlias.objects.get_or_create(old=oldLayerId)
alias.new = annotation.public_id
alias.save()
2011-12-04 13:33:48 +00:00
for oldId in sorted(padma, key=lambda x: padma[x]['created']):
2011-12-05 13:49:34 +00:00
item = get_item({
'title': padma_data[oldId]['title']
})
print '\n', oldId, item.itemId
#if True:
data = padma_data[oldId]
_data = item_data(data)
2011-12-18 09:49:28 +00:00
username = _data.pop('creator').strip()
2011-12-05 13:49:34 +00:00
item.user = User.objects.get(username=username)
for key in _data:
item.data[key] = _data[key]
if 'collection' in data and data['collection']:
group, created = Group.objects.get_or_create(name=data['collection'])
item.groups.add(group)
2011-12-05 13:49:34 +00:00
if 'poster_frame' in item.data:
item.poster_frame = float(item.data.pop('poster_frame')) / 1000
if 'published' in item.data:
item.published = datetime.fromtimestamp(int(item.data.pop('published')))
if 'created' in item.data:
item.created = datetime.fromtimestamp(int(item.data.pop('created')))
if 'modified' in item.data:
item.modified = datetime.fromtimestamp(int(item.data.pop('modified')))
item.level = not data.get('public', False) and 2 or 0
item.save()
item.make_poster(True)
import_layers(item, data['layers'])
#link file
if oldId in padma:
if padma[oldId]['oshash']:
print 'add file', padma[oldId]['oshash']
oshash = padma[oldId]['oshash']
qs = File.objects.filter(oshash=oshash)
if qs.count() == 0:
f = File()
f.oshash = oshash
else:
f = qs[0]
f.item = item
f.path = padma[oldId].get('file', '')
f.save()
if 'ogg_oshash' in padma[oldId]:
print 'add file', padma[oldId]['ogg_oshash']
oshash = padma[oldId]['ogg_oshash']
qs = File.objects.filter(oshash=oshash)
if qs.count() == 0:
f = File()
f.oshash = oshash
else:
f = qs[0]
f.item = item
f.path = padma[oldId].get('ogg', '')
f.save()
2011-12-04 13:33:48 +00:00
alias, created = IDAlias.objects.get_or_create(old=oldId)
2011-12-05 13:49:34 +00:00
alias.new = item.itemId
2011-12-04 13:33:48 +00:00
alias.save()
2011-12-05 13:49:34 +00:00
print item, item.itemId
2011-12-18 09:49:28 +00:00
print "import lists"
2011-12-04 17:05:48 +00:00
for l in lists:
2011-12-18 09:49:28 +00:00
l['user'] = User.objects.get(username=l['user'].strip())
2011-12-05 13:49:34 +00:00
p,c = List.objects.get_or_create(name=l['title'], user=l['user'])
2011-12-04 17:05:48 +00:00
p.type = l['type'] == 'static' and 'static' or 'smart'
p.status = l['public'] and 'featured' or 'private'
p.description = html_parser(l['description'])
2011-12-04 17:05:48 +00:00
p.save()
if l['type'] == 'static':
2011-12-05 13:49:34 +00:00
for v in l['items']:
try:
itemId = IDAlias.objects.get(old=v).new
i = Item.objects.get(itemId=itemId)
p.add(i)
except Item.DoesNotExist:
print p.name, v
2011-12-04 17:05:48 +00:00
else:
key = l['query']['key']
value= l['query']['value']
if key == '': key = '*'
p.query = {'conditions': [{'key': key, 'value': value, 'operator': '='}], 'operator': '&'}
p.save()
alias, created = ListAlias.objects.get_or_create(old=l['id'])
alias.new = p.get_id()
alias.save()
#Places
2011-12-18 09:49:28 +00:00
print "import places"
2011-12-04 17:05:48 +00:00
for l in locations:
oldId = l.pop('id')
2011-12-05 13:49:34 +00:00
if 'user' in l:
2011-12-18 09:49:28 +00:00
l['user'] = User.objects.get(username=l['user'].strip())
2011-12-05 13:49:34 +00:00
else:
l['user'] = User.objects.all().order_by('id')[0]
2013-01-15 10:47:02 +00:00
l['name'] = ox.decode_html(l['name'])
2011-12-04 17:05:48 +00:00
l['created'] = datetime.fromtimestamp(int(l['created']))
l['modified'] = datetime.fromtimestamp(int(l['modified']))
2011-12-05 13:49:34 +00:00
l['alternativeNames'] = tuple(l['alternativeNames'])
l['geoname'] = l['name']
2012-02-16 09:29:00 +00:00
l['type'] = 'feature'
2011-12-05 13:49:34 +00:00
p, c = Place.objects.get_or_create(name=l['name'])
for key in l:
if key != 'annotations':
setattr(p, key, l[key])
2011-12-04 17:05:48 +00:00
p.save()
#FIXME matches
2011-12-25 12:55:02 +00:00
#fixme update links in annotations
2011-12-05 13:49:34 +00:00