2011-12-04 13:33:48 +00:00
#!/usr/bin/env python
from __future__ import division
import os
import sys
2011-12-05 13:49:34 +00:00
import hashlib
2011-12-25 12:55:02 +00:00
import re
2011-12-04 13:33:48 +00:00
import_dir = os . path . normpath ( os . path . abspath ( os . path . dirname ( __file__ ) ) )
root_dir = os . path . normpath ( os . path . abspath ( sys . argv [ 1 ] ) )
os . chdir ( root_dir )
#using virtualenv's activate_this.py to reorder sys.path
activate_this = os . path . join ( root_dir , ' .. ' , ' bin ' , ' activate_this.py ' )
execfile ( activate_this , dict ( __file__ = activate_this ) )
sys . path . insert ( 0 , root_dir )
from django . core . management import setup_environ
try :
import settings # Assumed to be in the same directory.
except ImportError :
import sys
sys . stderr . write ( " Error: Can ' t find the file ' settings.py ' in the directory containing %r . It appears you ' ve customized things. \n You ' ll have to run django-admin.py, passing it your settings module. \n (If the file settings.py does indeed exist, it ' s causing an ImportError somehow.) \n " % __file__ )
sys . exit ( 1 )
settings . DEBUG = False
setup_environ ( settings )
from django . contrib . auth . models import User , Group
from datetime import datetime
from ox . utils import json
import ox
2011-12-05 13:49:34 +00:00
import monkey_patch . models
from item . models import Item , get_item
2011-12-04 17:05:48 +00:00
from annotation . models import Annotation
2011-12-25 12:55:02 +00:00
2011-12-04 13:33:48 +00:00
from archive . models import File
2011-12-04 17:05:48 +00:00
from urlalias . models import IDAlias , LayerAlias , ListAlias
from place . models import Place
from itemlist . models import List
2011-12-04 13:33:48 +00:00
from django . db import connection , transaction
2011-12-05 13:49:34 +00:00
from user . models import SessionData
2011-12-04 13:33:48 +00:00
2011-12-25 12:55:02 +00:00
def html_parser ( text ) :
text = text . strip ( )
text = text . replace ( ' <i> ' , ' __i__ ' ) . replace ( ' </i> ' , ' __/i__ ' )
text = text . replace ( ' <b> ' , ' __b__ ' ) . replace ( ' </b> ' , ' __/b__ ' )
#truns links into wiki links, make sure to only take http links
text = re . sub ( ' <a .*?href= " (http.*?) " .*?>(.*?)</a> ' , ' [ \\ 1 \\ 2] ' , text )
text = ox . escape ( text )
text = text . replace ( ' __i__ ' , ' <i> ' ) . replace ( ' __/i__ ' , ' </i> ' )
text = text . replace ( ' __b__ ' , ' <b> ' ) . replace ( ' __/b__ ' , ' </b> ' )
links = re . compile ( ' ( \ [(http.*?) (.*?) \ ]) ' ) . findall ( text )
for t , link , txt in links :
link = link . replace ( ' http ' , ' __LINK__ ' ) . replace ( ' . ' , ' __DOT__ ' )
ll = ' <a href= " %s " > %s </a> ' % ( link , txt )
text = text . replace ( t , ll )
links = re . compile ( ' ( \ [(http.*?) \ ]) ' ) . findall ( text )
for t , link in links :
link = link . replace ( ' http ' , ' __LINK__ ' ) . replace ( ' . ' , ' __DOT__ ' )
ll = ' <a href= " %s " > %s </a> ' % ( link , link )
text = text . replace ( t , ll )
text = ox . urlize ( text , nofollow = False )
#inpage links
text = re . sub ( ' \ [(/.+?) (.+?) \ ] ' , ' <a href= " \\ 1 " > \\ 2</a> ' , text )
text = text . replace ( ' __LINK__ ' , ' http ' ) . replace ( ' __DOT__ ' , ' . ' )
2011-12-25 13:25:40 +00:00
text = text . replace ( ' \n \n ' , ' <br> \n ' ) . replace ( " \n " , ' <br> \n ' )
2011-12-25 12:55:02 +00:00
return text
2011-12-04 13:33:48 +00:00
os . chdir ( import_dir )
2011-12-05 13:49:34 +00:00
with open ( ' padma/users.json ' ) as f : users = json . load ( f )
2011-12-04 13:33:48 +00:00
2011-12-05 13:49:34 +00:00
with open ( ' padma/files.json ' ) as f : padma = json . load ( f )
2011-12-04 13:33:48 +00:00
2011-12-05 13:49:34 +00:00
with open ( ' padma/locations.json ' ) as f : locations = json . load ( f )
2011-12-04 17:05:48 +00:00
2011-12-05 13:49:34 +00:00
with open ( ' padma/lists.json ' ) as f : lists = json . load ( f )
2011-12-04 17:05:48 +00:00
2011-12-05 13:49:34 +00:00
with open ( ' padma/data.json ' ) as f : padma_data = json . load ( f )
2011-12-04 13:33:48 +00:00
longest_username = max ( [ len ( u [ ' username ' ] . strip ( ) ) for u in users ] ) + 1
if longest_username > 255 :
print " longer usernames, extending table to " , longest_username , ' fix in monkey_patch/models.py '
cursor = connection . cursor ( )
cursor . execute ( ' ALTER TABLE auth_user ALTER COLUMN username TYPE varchar( %d ); ' % longest_username )
transaction . commit_unless_managed ( )
2011-12-05 13:49:34 +00:00
print " import users "
2011-12-04 13:33:48 +00:00
for u in users :
2011-12-18 09:49:28 +00:00
username = u [ ' username ' ] . strip ( )
2011-12-04 13:33:48 +00:00
user , created = User . objects . get_or_create ( username = username )
user . email = u [ ' email ' ]
2011-12-11 22:01:02 +00:00
if not ' @ ' in user . email :
user . email = ' '
2011-12-04 13:33:48 +00:00
user . password = u [ ' password ' ]
user . date_joined = datetime . strptime ( u [ ' created ' ] , ' % Y- % m- %d T % H: % M: % SZ ' )
user . save ( )
profile = user . get_profile ( )
2011-12-18 09:49:28 +00:00
if not user . email :
profile . newsletter = False
2011-12-05 13:49:34 +00:00
if ' admin ' in u [ ' groups ' ] :
2011-12-04 13:33:48 +00:00
profile . set_level ( ' admin ' )
else :
profile . set_level ( ' member ' )
profile . save ( )
2011-12-05 13:49:34 +00:00
if SessionData . objects . filter ( user = user ) . count ( ) == 0 :
s = SessionData ( )
s . user = user
s . session_key = hashlib . sha1 ( user . username ) . hexdigest ( )
2011-12-11 22:01:02 +00:00
s . lastseen = user . date_joined
s . firstseen = user . date_joined
2011-12-05 13:49:34 +00:00
s . timesseen = 1
s . save ( )
2011-12-04 13:33:48 +00:00
for g in u [ ' groups ' ] :
2011-12-04 17:05:48 +00:00
if g and g . strip ( ) and g != ' admin ' :
2011-12-04 13:33:48 +00:00
group , created = Group . objects . get_or_create ( name = g )
user . groups . add ( group )
def item_data ( data ) :
d = { }
for key in data :
if key in data :
value = data [ key ]
if isinstance ( value , basestring ) :
value = value . replace ( ' \r \n ' , ' \n ' ) . strip ( )
d [ {
u ' id ' : u ' oldId ' ,
2012-01-10 10:09:22 +00:00
u ' categories ' : u ' topic ' ,
u ' source ' : u ' project ' ,
u ' collection ' : u ' source ' ,
2011-12-04 13:33:48 +00:00
u ' languages ' : u ' language ' ,
} . get ( key , key ) ] = value
if ' director ' in d :
2012-01-10 10:09:22 +00:00
d [ ' director ' ] = unicode ( d [ ' director ' ] ) . replace ( ' and ' , ' , ' ) . strip ( ) . split ( ' , ' )
d [ ' director ' ] = filter ( lambda x : x . strip ( ) . lower ( ) not in ( ' none ' , ' n/a ' , ' ' , ' various ' ) ,
d [ ' director ' ] )
2011-12-04 13:33:48 +00:00
for key in ( ' layers ' , ' duration ' , ' size ' , ' public ' ) :
if key in d :
del d [ key ]
2012-01-20 20:28:51 +00:00
d [ ' license ' ] = [ ' Pad.ma General Public License ' ]
2011-12-04 13:33:48 +00:00
return d
def import_layers ( item , layers ) :
Annotation . objects . filter ( item = item ) . delete ( )
print " importing %d annotations " % len ( layers )
2011-12-05 13:49:34 +00:00
with transaction . commit_on_success ( ) :
for layer in layers :
oldLayerId = layer [ ' id ' ]
layer_name = ' %s s ' % layer [ ' track ' ]
annotation = Annotation ( item = item , layer = layer_name )
annotation . start = float ( layer [ ' time_in ' ] ) / 1000
annotation . end = float ( layer [ ' time_out ' ] ) / 1000
2012-01-20 19:38:21 +00:00
if annotation . end < annotation . start :
2012-01-20 20:19:38 +00:00
annotation . end , annotation . start = annotation . start , annotation . end
2011-12-18 09:49:28 +00:00
username = layer [ ' creator ' ] . strip ( )
2011-12-05 13:49:34 +00:00
annotation . user = User . objects . get ( username = username )
2011-12-25 13:25:40 +00:00
annotation . value = html_parser ( layer [ ' value ' ] )
2011-12-05 13:49:34 +00:00
annotation . created = datetime . fromtimestamp ( int ( layer [ ' created ' ] ) )
annotation . modified = datetime . fromtimestamp ( int ( layer [ ' modified ' ] ) )
annotation . save ( )
#migration alias
alias , created = LayerAlias . objects . get_or_create ( old = oldLayerId )
alias . new = annotation . public_id
alias . save ( )
2011-12-04 13:33:48 +00:00
for oldId in sorted ( padma , key = lambda x : padma [ x ] [ ' created ' ] ) :
2011-12-05 13:49:34 +00:00
item = get_item ( {
' title ' : padma_data [ oldId ] [ ' title ' ]
} )
print ' \n ' , oldId , item . itemId
#if True:
data = padma_data [ oldId ]
_data = item_data ( data )
2011-12-18 09:49:28 +00:00
username = _data . pop ( ' creator ' ) . strip ( )
2011-12-05 13:49:34 +00:00
item . user = User . objects . get ( username = username )
for key in _data :
item . data [ key ] = _data [ key ]
2012-01-20 19:38:21 +00:00
if ' collection ' in data and data [ ' collection ' ] :
group , created = Group . objects . get_or_create ( name = data [ ' collection ' ] )
item . groups . add ( group )
2011-12-05 13:49:34 +00:00
if ' poster_frame ' in item . data :
item . poster_frame = float ( item . data . pop ( ' poster_frame ' ) ) / 1000
if ' published ' in item . data :
item . published = datetime . fromtimestamp ( int ( item . data . pop ( ' published ' ) ) )
if ' created ' in item . data :
item . created = datetime . fromtimestamp ( int ( item . data . pop ( ' created ' ) ) )
if ' modified ' in item . data :
item . modified = datetime . fromtimestamp ( int ( item . data . pop ( ' modified ' ) ) )
item . level = not data . get ( ' public ' , False ) and 2 or 0
item . save ( )
item . make_poster ( True )
import_layers ( item , data [ ' layers ' ] )
#link file
if oldId in padma :
if padma [ oldId ] [ ' oshash ' ] :
print ' add file ' , padma [ oldId ] [ ' oshash ' ]
oshash = padma [ oldId ] [ ' oshash ' ]
qs = File . objects . filter ( oshash = oshash )
if qs . count ( ) == 0 :
f = File ( )
f . oshash = oshash
else :
f = qs [ 0 ]
f . item = item
f . path = padma [ oldId ] . get ( ' file ' , ' ' )
f . save ( )
if ' ogg_oshash ' in padma [ oldId ] :
print ' add file ' , padma [ oldId ] [ ' ogg_oshash ' ]
oshash = padma [ oldId ] [ ' ogg_oshash ' ]
qs = File . objects . filter ( oshash = oshash )
if qs . count ( ) == 0 :
f = File ( )
f . oshash = oshash
else :
f = qs [ 0 ]
f . item = item
f . path = padma [ oldId ] . get ( ' ogg ' , ' ' )
f . save ( )
2011-12-04 13:33:48 +00:00
alias , created = IDAlias . objects . get_or_create ( old = oldId )
2011-12-05 13:49:34 +00:00
alias . new = item . itemId
2011-12-04 13:33:48 +00:00
alias . save ( )
2011-12-05 13:49:34 +00:00
print item , item . itemId
2011-12-18 09:49:28 +00:00
print " import lists "
2011-12-04 17:05:48 +00:00
for l in lists :
2011-12-18 09:49:28 +00:00
l [ ' user ' ] = User . objects . get ( username = l [ ' user ' ] . strip ( ) )
2011-12-05 13:49:34 +00:00
p , c = List . objects . get_or_create ( name = l [ ' title ' ] , user = l [ ' user ' ] )
2011-12-04 17:05:48 +00:00
p . type = l [ ' type ' ] == ' static ' and ' static ' or ' smart '
p . status = l [ ' public ' ] and ' featured ' or ' private '
p . description = l [ ' description ' ]
p . save ( )
if l [ ' type ' ] == ' static ' :
2011-12-05 13:49:34 +00:00
for v in l [ ' items ' ] :
try :
itemId = IDAlias . objects . get ( old = v ) . new
i = Item . objects . get ( itemId = itemId )
p . add ( i )
except Item . DoesNotExist :
print p . name , v
2011-12-04 17:05:48 +00:00
else :
key = l [ ' query ' ] [ ' key ' ]
value = l [ ' query ' ] [ ' value ' ]
if key == ' ' : key = ' * '
p . query = { ' conditions ' : [ { ' key ' : key , ' value ' : value , ' operator ' : ' = ' } ] , ' operator ' : ' & ' }
p . save ( )
alias , created = ListAlias . objects . get_or_create ( old = l [ ' id ' ] )
alias . new = p . get_id ( )
alias . save ( )
#Places
2011-12-18 09:49:28 +00:00
print " import places "
2011-12-04 17:05:48 +00:00
for l in locations :
oldId = l . pop ( ' id ' )
2011-12-05 13:49:34 +00:00
if ' user ' in l :
2011-12-18 09:49:28 +00:00
l [ ' user ' ] = User . objects . get ( username = l [ ' user ' ] . strip ( ) )
2011-12-05 13:49:34 +00:00
else :
l [ ' user ' ] = User . objects . all ( ) . order_by ( ' id ' ) [ 0 ]
2012-01-20 19:38:21 +00:00
l [ ' name ' ] = ox . decodeHtml ( l [ ' name ' ] )
2011-12-04 17:05:48 +00:00
l [ ' created ' ] = datetime . fromtimestamp ( int ( l [ ' created ' ] ) )
l [ ' modified ' ] = datetime . fromtimestamp ( int ( l [ ' modified ' ] ) )
2011-12-05 13:49:34 +00:00
l [ ' alternativeNames ' ] = tuple ( l [ ' alternativeNames ' ] )
l [ ' geoname ' ] = l [ ' name ' ]
2011-12-17 08:35:09 +00:00
l [ ' type ' ] = ' city '
2011-12-05 13:49:34 +00:00
p , c = Place . objects . get_or_create ( name = l [ ' name ' ] )
for key in l :
if key != ' annotations ' :
setattr ( p , key , l [ key ] )
2011-12-04 17:05:48 +00:00
p . save ( )
#FIXME matches
2011-12-25 12:55:02 +00:00
#fixme update links in annotations
2011-12-05 13:49:34 +00:00