From 272c228326b6ef11d743e8b3d10404c91aa12ce7 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 4 Dec 2011 14:33:48 +0100 Subject: [PATCH] padma migration --- import_padma.py | 155 ++++++++++++++++++++++++++++++++++++++++++++++++ padma_dump.py | 74 +++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100755 import_padma.py create mode 100755 padma_dump.py diff --git a/import_padma.py b/import_padma.py new file mode 100755 index 0000000..bd459a3 --- /dev/null +++ b/import_padma.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +from __future__ import division +import os +import sys + +import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) +root_dir = os.path.normpath(os.path.abspath(sys.argv[1])) +os.chdir(root_dir) + +#using virtualenv's activate_this.py to reorder sys.path +activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py') +execfile(activate_this, dict(__file__=activate_this)) + +sys.path.insert(0, root_dir) + +from django.core.management import setup_environ +try: + import settings # Assumed to be in the same directory. +except ImportError: + import sys + sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__) + sys.exit(1) +settings.DEBUG=False + +setup_environ(settings) + +from django.contrib.auth.models import User, Group +from datetime import datetime +from ox.utils import json +import ox +from item.models import Item +from annotation.models import Annotation, Layer +from archive.models import File +from urlalias.models import IDAlias, LayerAlias +from django.db import connection, transaction + + +os.chdir(import_dir) + +with open('users.json') as f: users = json.load(f) + +with open('padma_files.json') as f: padma = json.load(f) + +with open('padma_data.json') as f: padma_data = json.load(f) + +longest_username = max([len(u['username'].strip()) for u in users]) + 1 +if longest_username > 255: + print "longer usernames, extending table to", longest_username, 'fix in monkey_patch/models.py' + cursor = connection.cursor() + cursor.execute('ALTER TABLE auth_user ALTER COLUMN username TYPE varchar(%d);'%longest_username) + transaction.commit_unless_managed() + +print "now users" +for u in users: + username = u['username'].strip() + user, created = User.objects.get_or_create(username=username) + user.email = u['email'] + user.password = u['password'] + user.date_joined = datetime.strptime(u['created'], '%Y-%m-%dT%H:%M:%SZ') + user.save() + profile = user.get_profile() + if 'admin' in user['groups']: + profile.set_level('admin') + else: + profile.set_level('member') + profile.save() + for g in u['groups']: + if g and g.strip() and g not 'admin': + group, created = Group.objects.get_or_create(name=g) + user.groups.add(group) + +def item_data(data): + d = {} + for key in data: + if key in data: + value = data[key] + if isinstance(value, basestring): + value = value.replace('\r\n', '\n').strip() + d[{ + u'id': u'oldId', + u'categories': u'category', + u'languages': u'language', + }.get(key, key)] = value + if 'director' in d: + d['director'] = unicode(d['director']).strip().split(', ') + for key in ('layers', 'duration', 'size', 'public'): + if key in d: + del d[key] + return d + +def import_layers(item, layers): + Annotation.objects.filter(item=item).delete() + print "importing %d annotations" % len(layers) + for layer in layers: + oldLayerId = layer['id'] + annotation = Annotation(item=item, layer=layer['track']) + annotation.start = float(layer['time_in'])/1000 + annotation.end = float(layer['time_out'])/1000 + username = layer['creator'].strip() + annotation.user = User.objects.get(username=username) + annotation.value = layer['value'] + annotation.created = datetime.fromtimestamp(int(layer['created'])) + annotation.modified = datetime.fromtimestamp(int(layer['modified'])) + annotation.save() + #migration alias + alias, created = LayerAlias.objects.get_or_create(old=oldLayerId) + alias.new = annotation.public_id + alias.save() + +i=1 +for oldId in sorted(padma, key=lambda x: padma[x]['created']): + itemId = ox.to26(i) + print '\n', itemId, oldId + qs = Item.objects.filter(itemId=itemId) + if qs.count() == 0: + item = Item(itemId=itemId) + else: + item = qs[0] + alias, created = IDAlias.objects.get_or_create(old=oldId) + alias.new = itemId + alias.save() + if True or not item.data: + data = padma_data[oldId] + _data = item_data(data) + for key in _data: + item.data[key] = _data[key] + if 'poster_frame' in data: + item.poster_frame = float(item.data.pop('poster_frame')) / 1000 + if 'published' in data: + item.published = datetime.fromtimestamp(int(item.data.pop('published'))) + if 'created' in data: + item.created = datetime.fromtimestamp(int(item.data.pop('created'))) + if 'modified' in data: + item.modified = datetime.fromtimestamp(int(item.data.pop('modified'))) + item.level = data.get('public', False) and 0 or 2 + username = item.data.pop('creator').strip() + item.user = User.objects.get(username=username) + item.save() + import_layers(item, data['layers']) + #link file + if oldId in padma: + if padma[oldId]['oshash']: + print 'add file', padma[oldId]['oshash'] + f, created = File.objects.get_or_create(oshash=padma[oldId]['oshash'], + item=item) + f.path = f.get('file', '') + f.save() + if 'ogg_oshash' in padma[oldId]: + print 'add file', padma[oldId]['ogg_oshash'] + f, created = File.objects.get_or_create(oshash=padma[oldId]['ogg_oshash'], + item=item) + f.path = f.get('ogg', '') + f.save() + i += 1 + print item, item.available diff --git a/padma_dump.py b/padma_dump.py new file mode 100755 index 0000000..d9d3e44 --- /dev/null +++ b/padma_dump.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# vi:si:et:sw=2:sts=2:ts=2 +# GPL written 2008 by j@pad.ma + +import pkg_resources +pkg_resources.require("TurboGears") + +from turbogears import config, update_config, start_server +import cherrypy +cherrypy.lowercase_api = True +from os.path import * +import sys + +# first look on the command line for a desired config file, +# if it's not on the command line, then +# look for setup.py in this directory. If it's not there, this script is +# probably installed +if len(sys.argv) > 1: + update_config(configfile=sys.argv[1], + modulename="padma.config") +elif exists(join(dirname(__file__), "setup.py")): + update_config(configfile="dev.cfg",modulename="padma.config") +else: + update_config(configfile="prod.cfg",modulename="padma.config") +config.update(dict(package="padma")) + +from padma.model import * + +import os +import ox +import simplejson as json + +prefix = '/tmp/padma' +os.makedirs(prefix) + +data = {} +for v in Video.select(): + data[v.hid] = v.jsondump() + +with open(os.path.join(prefix, 'padma_data.json'), 'w') as f: + json.dump(data, f) + +users = [] +for u in User.select().orderBy('id'): + users.append({ + 'id': u.id, + 'username': u.user_name.strip(), + 'email': u.email_address, + 'password': 'sha1$$' + u.password, + 'created': u.created.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'groups': [g.name for g in u.groups], + }) + +with open(os.path.join(prefix, 'users.json'), 'w') as f: + json.dump(users, f, indent=2) + + +files = [] +for v in Video.select().orderBy('id'): + f = { + 'sha1sum': v.source_hash, + 'ogg': v.filename, + 'id': v.hid, + 'created': int(v.created.strftime('%s')) + } + info = ox.avinfo(v.filename) + f['oshash'] = info.get('metadata', {}).get('SOURCE_OSHASH', '') + f['ogg_oshash'] = info['oshash'] + files.append(f) + +with open(os.path.join(prefix, 'padma_files.json'), 'w') as f: + json.dump(files, f, indent=2) +