#!/usr/bin/env python from __future__ import division import os import sys import hashlib import re import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) root_dir = os.path.normpath(os.path.abspath(sys.argv[1])) os.chdir(root_dir) #using virtualenv's activate_this.py to reorder sys.path activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py') execfile(activate_this, dict(__file__=activate_this)) sys.path.insert(0, root_dir) from django.core.management import setup_environ try: import settings # Assumed to be in the same directory. except ImportError: import sys sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__) sys.exit(1) settings.DEBUG=False setup_environ(settings) from django.contrib.auth.models import User, Group from datetime import datetime from ox.utils import json import ox import monkey_patch.models from item.models import Item, get_item from annotation.models import Annotation from archive.models import File from urlalias.models import IDAlias, LayerAlias, ListAlias from place.models import Place from itemlist.models import List from django.db import connection, transaction from user.models import SessionData def html_parser(text): text = text.strip() text = text.replace('', '__i__').replace('', '__/i__') text = text.replace('', '__b__').replace('', '__/b__') #truns links into wiki links, make sure to only take http links text = re.sub('(.*?)', '[\\1 \\2]', text) text = ox.escape(text) text = text.replace('__i__', '').replace('__/i__', '') text = text.replace('__b__', '').replace('__/b__', '') links = re.compile('(\[(http.*?) (.*?)\])').findall(text) for t, link, txt in links: link = link.replace('http', '__LINK__').replace('.', '__DOT__') ll = '%s' % (link, txt) text = text.replace(t, ll) links = re.compile('(\[(http.*?)\])').findall(text) for t, link in links: link = link.replace('http', '__LINK__').replace('.', '__DOT__') ll = '%s' % (link, link) text = text.replace(t, ll) text = ox.urlize(text, nofollow=False) #inpage links text = re.sub('\[(/.+?) (.+?)\]', '\\2', text) text = text.replace('__LINK__', 'http').replace('__DOT__', '.') text = text.replace('\n\n', '
\n').replace("\n", '
\n') return text os.chdir(import_dir) with open('padma/users.json') as f: users = json.load(f) with open('padma/files.json') as f: padma = json.load(f) with open('padma/locations.json') as f: locations = json.load(f) with open('padma/lists.json') as f: lists = json.load(f) with open('padma/data.json') as f: padma_data = json.load(f) longest_username = max([len(u['username'].strip()) for u in users]) + 1 if longest_username > 255: print "longer usernames, extending table to", longest_username, 'fix in monkey_patch/models.py' cursor = connection.cursor() cursor.execute('ALTER TABLE auth_user ALTER COLUMN username TYPE varchar(%d);'%longest_username) transaction.commit_unless_managed() print "import users" for u in users: username = u['username'].strip() user, created = User.objects.get_or_create(username=username) user.email = u['email'] if not '@' in user.email: user.email = '' user.password = u['password'] user.date_joined = datetime.strptime(u['created'], '%Y-%m-%dT%H:%M:%SZ') user.save() profile = user.get_profile() if not user.email: profile.newsletter = False if 'admin' in u['groups']: profile.set_level('admin') else: profile.set_level('member') profile.save() if SessionData.objects.filter(user=user).count() == 0: s = SessionData() s.user = user s.session_key = hashlib.sha1(user.username).hexdigest() s.lastseen = user.date_joined s.firstseen = user.date_joined s.timesseen = 1 s.save() for g in u['groups']: if g and g.strip() and g != 'admin': group, created = Group.objects.get_or_create(name=g) user.groups.add(group) def item_data(data): d = {} for key in data: if key in data: value = data[key] if isinstance(value, basestring): value = value.replace('\r\n', '\n').strip() d[{ u'id': u'oldId', u'categories': u'topic', u'source': u'project', u'collection': u'source', u'languages': u'language', }.get(key, key)] = value if 'director' in d: d['director'] = unicode(d['director']).replace(' and ', ', ').strip().split(', ') d['director'] = filter(lambda x: x.strip().lower() not in ('none', 'n/a', '', 'various'), d['director']) for key in ('layers', 'duration', 'size', 'public'): if key in d: del d[key] d['license'] = ['Pad.ma General Public License'] return d def import_layers(item, layers): Annotation.objects.filter(item=item).delete() print "importing %d annotations" % len(layers) with transaction.commit_on_success(): for layer in layers: oldLayerId = layer['id'] layer_name = '%ss'%layer['track'] annotation = Annotation(item=item, layer=layer_name) annotation.start = float(layer['time_in'])/1000 annotation.end = float(layer['time_out'])/1000 if annotation.end < annotation.start: annotation.end, annotation.start = annotation.start, annotation.end username = layer['creator'].strip() annotation.user = User.objects.get(username=username) annotation.value = html_parser(layer['value']) annotation.created = datetime.fromtimestamp(int(layer['created'])) annotation.modified = datetime.fromtimestamp(int(layer['modified'])) annotation.save() #migration alias alias, created = LayerAlias.objects.get_or_create(old=oldLayerId) alias.new = annotation.public_id alias.save() for oldId in sorted(padma, key=lambda x: padma[x]['created']): item = get_item({ 'title': padma_data[oldId]['title'] }) print '\n', oldId, item.itemId #if True: data = padma_data[oldId] _data = item_data(data) username = _data.pop('creator').strip() item.user = User.objects.get(username=username) for key in _data: item.data[key] = _data[key] if 'collection' in data and data['collection']: group, created = Group.objects.get_or_create(name=data['collection']) item.groups.add(group) if 'poster_frame' in item.data: item.poster_frame = float(item.data.pop('poster_frame')) / 1000 if 'published' in item.data: item.published = datetime.fromtimestamp(int(item.data.pop('published'))) if 'created' in item.data: item.created = datetime.fromtimestamp(int(item.data.pop('created'))) if 'modified' in item.data: item.modified = datetime.fromtimestamp(int(item.data.pop('modified'))) item.level = not data.get('public', False) and 2 or 0 item.save() item.make_poster(True) import_layers(item, data['layers']) #link file if oldId in padma: if padma[oldId]['oshash']: print 'add file', padma[oldId]['oshash'] oshash = padma[oldId]['oshash'] qs = File.objects.filter(oshash=oshash) if qs.count() == 0: f = File() f.oshash = oshash else: f = qs[0] f.item = item f.path = padma[oldId].get('file', '') f.save() if 'ogg_oshash' in padma[oldId]: print 'add file', padma[oldId]['ogg_oshash'] oshash = padma[oldId]['ogg_oshash'] qs = File.objects.filter(oshash=oshash) if qs.count() == 0: f = File() f.oshash = oshash else: f = qs[0] f.item = item f.path = padma[oldId].get('ogg', '') f.save() alias, created = IDAlias.objects.get_or_create(old=oldId) alias.new = item.itemId alias.save() print item, item.itemId print "import lists" for l in lists: l['user'] = User.objects.get(username=l['user'].strip()) p,c = List.objects.get_or_create(name=l['title'], user=l['user']) p.type = l['type'] == 'static' and 'static' or 'smart' p.status = l['public'] and 'featured' or 'private' p.description = l['description'] p.save() if l['type'] == 'static': for v in l['items']: try: itemId = IDAlias.objects.get(old=v).new i = Item.objects.get(itemId=itemId) p.add(i) except Item.DoesNotExist: print p.name, v else: key = l['query']['key'] value= l['query']['value'] if key == '': key = '*' p.query = {'conditions': [{'key': key, 'value': value, 'operator': '='}], 'operator': '&'} p.save() alias, created = ListAlias.objects.get_or_create(old=l['id']) alias.new = p.get_id() alias.save() #Places print "import places" for l in locations: oldId = l.pop('id') if 'user' in l: l['user'] = User.objects.get(username=l['user'].strip()) else: l['user'] = User.objects.all().order_by('id')[0] l['name'] = ox.decodeHtml(l['name']) l['created'] = datetime.fromtimestamp(int(l['created'])) l['modified'] = datetime.fromtimestamp(int(l['modified'])) l['alternativeNames'] = tuple(l['alternativeNames']) l['geoname'] = l['name'] l['type'] = 'city' p, c = Place.objects.get_or_create(name=l['name']) for key in l: if key != 'annotations': setattr(p, key, l[key]) p.save() #FIXME matches #fixme update links in annotations