padma migration

This commit is contained in:
j 2011-12-04 14:33:48 +01:00
commit 272c228326
2 changed files with 229 additions and 0 deletions

155
import_padma.py Executable file
View file

@ -0,0 +1,155 @@
#!/usr/bin/env python
from __future__ import division
import os
import sys
import_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
root_dir = os.path.normpath(os.path.abspath(sys.argv[1]))
os.chdir(root_dir)
#using virtualenv's activate_this.py to reorder sys.path
activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py')
execfile(activate_this, dict(__file__=activate_this))
sys.path.insert(0, root_dir)
from django.core.management import setup_environ
try:
import settings # Assumed to be in the same directory.
except ImportError:
import sys
sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
sys.exit(1)
settings.DEBUG=False
setup_environ(settings)
from django.contrib.auth.models import User, Group
from datetime import datetime
from ox.utils import json
import ox
from item.models import Item
from annotation.models import Annotation, Layer
from archive.models import File
from urlalias.models import IDAlias, LayerAlias
from django.db import connection, transaction
os.chdir(import_dir)
with open('users.json') as f: users = json.load(f)
with open('padma_files.json') as f: padma = json.load(f)
with open('padma_data.json') as f: padma_data = json.load(f)
longest_username = max([len(u['username'].strip()) for u in users]) + 1
if longest_username > 255:
print "longer usernames, extending table to", longest_username, 'fix in monkey_patch/models.py'
cursor = connection.cursor()
cursor.execute('ALTER TABLE auth_user ALTER COLUMN username TYPE varchar(%d);'%longest_username)
transaction.commit_unless_managed()
print "now users"
for u in users:
username = u['username'].strip()
user, created = User.objects.get_or_create(username=username)
user.email = u['email']
user.password = u['password']
user.date_joined = datetime.strptime(u['created'], '%Y-%m-%dT%H:%M:%SZ')
user.save()
profile = user.get_profile()
if 'admin' in user['groups']:
profile.set_level('admin')
else:
profile.set_level('member')
profile.save()
for g in u['groups']:
if g and g.strip() and g not 'admin':
group, created = Group.objects.get_or_create(name=g)
user.groups.add(group)
def item_data(data):
d = {}
for key in data:
if key in data:
value = data[key]
if isinstance(value, basestring):
value = value.replace('\r\n', '\n').strip()
d[{
u'id': u'oldId',
u'categories': u'category',
u'languages': u'language',
}.get(key, key)] = value
if 'director' in d:
d['director'] = unicode(d['director']).strip().split(', ')
for key in ('layers', 'duration', 'size', 'public'):
if key in d:
del d[key]
return d
def import_layers(item, layers):
Annotation.objects.filter(item=item).delete()
print "importing %d annotations" % len(layers)
for layer in layers:
oldLayerId = layer['id']
annotation = Annotation(item=item, layer=layer['track'])
annotation.start = float(layer['time_in'])/1000
annotation.end = float(layer['time_out'])/1000
username = layer['creator'].strip()
annotation.user = User.objects.get(username=username)
annotation.value = layer['value']
annotation.created = datetime.fromtimestamp(int(layer['created']))
annotation.modified = datetime.fromtimestamp(int(layer['modified']))
annotation.save()
#migration alias
alias, created = LayerAlias.objects.get_or_create(old=oldLayerId)
alias.new = annotation.public_id
alias.save()
i=1
for oldId in sorted(padma, key=lambda x: padma[x]['created']):
itemId = ox.to26(i)
print '\n', itemId, oldId
qs = Item.objects.filter(itemId=itemId)
if qs.count() == 0:
item = Item(itemId=itemId)
else:
item = qs[0]
alias, created = IDAlias.objects.get_or_create(old=oldId)
alias.new = itemId
alias.save()
if True or not item.data:
data = padma_data[oldId]
_data = item_data(data)
for key in _data:
item.data[key] = _data[key]
if 'poster_frame' in data:
item.poster_frame = float(item.data.pop('poster_frame')) / 1000
if 'published' in data:
item.published = datetime.fromtimestamp(int(item.data.pop('published')))
if 'created' in data:
item.created = datetime.fromtimestamp(int(item.data.pop('created')))
if 'modified' in data:
item.modified = datetime.fromtimestamp(int(item.data.pop('modified')))
item.level = data.get('public', False) and 0 or 2
username = item.data.pop('creator').strip()
item.user = User.objects.get(username=username)
item.save()
import_layers(item, data['layers'])
#link file
if oldId in padma:
if padma[oldId]['oshash']:
print 'add file', padma[oldId]['oshash']
f, created = File.objects.get_or_create(oshash=padma[oldId]['oshash'],
item=item)
f.path = f.get('file', '')
f.save()
if 'ogg_oshash' in padma[oldId]:
print 'add file', padma[oldId]['ogg_oshash']
f, created = File.objects.get_or_create(oshash=padma[oldId]['ogg_oshash'],
item=item)
f.path = f.get('ogg', '')
f.save()
i += 1
print item, item.available

74
padma_dump.py Executable file
View file

@ -0,0 +1,74 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
# GPL written 2008 by j@pad.ma
import pkg_resources
pkg_resources.require("TurboGears")
from turbogears import config, update_config, start_server
import cherrypy
cherrypy.lowercase_api = True
from os.path import *
import sys
# first look on the command line for a desired config file,
# if it's not on the command line, then
# look for setup.py in this directory. If it's not there, this script is
# probably installed
if len(sys.argv) > 1:
update_config(configfile=sys.argv[1],
modulename="padma.config")
elif exists(join(dirname(__file__), "setup.py")):
update_config(configfile="dev.cfg",modulename="padma.config")
else:
update_config(configfile="prod.cfg",modulename="padma.config")
config.update(dict(package="padma"))
from padma.model import *
import os
import ox
import simplejson as json
prefix = '/tmp/padma'
os.makedirs(prefix)
data = {}
for v in Video.select():
data[v.hid] = v.jsondump()
with open(os.path.join(prefix, 'padma_data.json'), 'w') as f:
json.dump(data, f)
users = []
for u in User.select().orderBy('id'):
users.append({
'id': u.id,
'username': u.user_name.strip(),
'email': u.email_address,
'password': 'sha1$$' + u.password,
'created': u.created.strftime('%Y-%m-%dT%H:%M:%SZ'),
'groups': [g.name for g in u.groups],
})
with open(os.path.join(prefix, 'users.json'), 'w') as f:
json.dump(users, f, indent=2)
files = []
for v in Video.select().orderBy('id'):
f = {
'sha1sum': v.source_hash,
'ogg': v.filename,
'id': v.hid,
'created': int(v.created.strftime('%s'))
}
info = ox.avinfo(v.filename)
f['oshash'] = info.get('metadata', {}).get('SOURCE_OSHASH', '')
f['ogg_oshash'] = info['oshash']
files.append(f)
with open(os.path.join(prefix, 'padma_files.json'), 'w') as f:
json.dump(files, f, indent=2)