cleaner site/archive/file seperation

This commit is contained in:
j 2010-07-31 15:13:37 +02:00
parent ccfb0e27f5
commit 953d207377
4 changed files with 189 additions and 104 deletions

View file

@ -1 +1,2 @@
media/* media/*
dev.sqlite

35
TODO
View file

@ -18,11 +18,38 @@ FIREFOX integration:
media servers hosting the actuall videos and clients to access them media servers hosting the actuall videos and clients to access them
- rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work) - rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work)
EXTENSION api:
oxff = OxFF() //is site is not allowed, ask if user wants to allow domaing to use oxff
oxff.update() //check for new files, would be nicer if that would be somehow automatic and not needed
oxff.archives() //return list of archive names
//new archive
archive = oxff.archive('name')
archive.setLocation() //opens file dialog to set location of archive
//get files
archive.files()
archive.files(since) //unixtimestamp, get new/deleted/... since
//get info
oxff.get(oshash) //retuns info
//get media
oxff.get(oshash, 'stills') //retuns stills dict or None if not extracted
oxff.get(oshash, '96p.webm') //returns video location or none if not extracted
//extract media
oxff.extract(oshash, 'stills')
oxff.extract(oshash, '96p.webm')
//questions
- how to upload media to site? somehow make use of Firefogg
- could some of those requests take to long and require callbacks?
TODO: TODO:
add fields: use api like approach as done in pand.do/ra backend code
make archive / file link via id?
is extracted field enough or requires frames/video thingy
cache location, should cache be inside of archive, home folder or whats a good default. cache location, should cache be inside of archive, home folder or whats a good default.
must be a config option in must be a config option in

View file

@ -91,13 +91,15 @@ $(function(){
$m.append($files); $m.append($files);
return $m; return $m;
} }
backend.request('files', function(result) { backend.request('archives', {'site': '0xdb.org'}, function(result) {
for_each_sorted(result, function(archive, movies) { $.each(result.archives, function(archive, path) {
var $archive = $('<div>'); backend.request('files', {'site': '0xdb.org', 'archive': archive}, function(result) {
$archive.html(archive); var $archive = $('<div>');
$('#movies').append($archive); $archive.html(archive);
for_each_sorted(movies, function(movie, files) { $('#movies').append($archive);
$archive.append(addMovie(movie, files)); for_each_sorted(result, function(movie, files) {
$archive.append(addMovie(movie, files));
});
}); });
}); });
}); });

241
oxd.py
View file

@ -173,7 +173,6 @@ def extract_still(video, target, position):
shutil.rmtree(framedir) shutil.rmtree(framedir)
return r == 0 return r == 0
def extract_video(video, target, profile, info): def extract_video(video, target, profile, info):
if not os.path.exists(target): if not os.path.exists(target):
fdir = os.path.dirname(target) fdir = os.path.dirname(target)
@ -228,8 +227,10 @@ def extract_video(video, target, profile, info):
profile_cmd +=['-acodec', 'libvorbis'] profile_cmd +=['-acodec', 'libvorbis']
aspect = dar.ratio aspect = dar.ratio
#use 1:1 pixel aspect ratio if dar is close to that
if abs(width/height - dar) < 0.02: if abs(width/height - dar) < 0.02:
aspect = '%s:%s' % (width, height) aspect = '%s:%s' % (width, height)
cmd = ['./ffmpeg', '-y', '-threads', '2', cmd = ['./ffmpeg', '-y', '-threads', '2',
'-i', video '-i', video
] + profile_cmd + [ ] + profile_cmd + [
@ -262,7 +263,7 @@ class ExtractThread(Thread):
self.db.extract.task_done() self.db.extract.task_done()
class Database(object): class Database(object):
def __init__(self, conn): def __init__(self, db_conn):
self.extract = Queue.Queue() self.extract = Queue.Queue()
for i in range(2): for i in range(2):
@ -270,16 +271,15 @@ class Database(object):
t.setDaemon(True) t.setDaemon(True)
t.start() t.start()
self.db_conn = conn self.db_conn = db_conn
conn = self.conn() conn, c = self.conn()
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''') c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''')
if int(self.get('version', 0)) < 1: if int(self.get('version', 0)) < 1:
self.set('version', 1) self.set('version', 1)
db = [ db = [
'''CREATE TABLE IF NOT EXISTS file ( '''CREATE TABLE IF NOT EXISTS file (
archive varchar(1024),
path varchar(1024) unique, path varchar(1024) unique,
folder varchar(1024), folder varchar(1024),
filename varchar(1024), filename varchar(1024),
@ -292,16 +292,16 @@ class Database(object):
created INT, created INT,
modified INT, modified INT,
deleted INT)''', deleted INT)''',
'''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''',
'''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''', '''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''',
'''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''', '''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''',
'''CREATE TABLE IF NOT EXISTS archive ( '''CREATE TABLE IF NOT EXISTS archive (
site varchar(1024), site varchar(1024),
name varchar(1024) unique, name varchar(1024),
path varchar(1024) unique, path varchar(1024),
updated INT, updated INT,
created INT, created INT,
updating INT)''', updating INT,
UNIQUE(site, name)))''',
'''CREATE TABLE IF NOT EXISTS derivative ( '''CREATE TABLE IF NOT EXISTS derivative (
oshash varchar(16), oshash varchar(16),
name varchar(1024), name varchar(1024),
@ -311,72 +311,73 @@ class Database(object):
for i in db: for i in db:
c.execute(i) c.execute(i)
c.execute('UPDATE archive set updating=0 WHERE 1=1') c.execute('UPDATE archive set updating=0 WHERE updating!=0')
conn.commit() conn.commit()
def conn(self): def conn(self):
conn = sqlite3.connect(self.db_conn, timeout=10) conn = sqlite3.connect(self.db_conn, timeout=10)
conn.text_factory = str conn.text_factory = sqlite3.OptimizedUnicode
return conn return conn, conn.cursor()
def get(self, key, default=None): def get(self, key, default=None):
conn = self.conn() conn, c = self.conn()
c = conn.cursor()
c.execute('SELECT value FROM setting WHERE key = ?', (key, )) c.execute('SELECT value FROM setting WHERE key = ?', (key, ))
for row in c: for row in c:
return row[0] return row[0]
return default return default
def set(self, key, value): def set(self, key, value):
conn = self.conn() conn, c = self.conn()
c = conn.cursor()
c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value))) c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value)))
conn.commit() conn.commit()
def remove(self, path): def remove_file(self, path):
conn, c = self.conn()
sql = 'DELETE FROM file WHERE path=?' sql = 'DELETE FROM file WHERE path=?'
conn = self.conn()
c = conn.cursor()
c.execute(sql, (path, )) c.execute(sql, (path, ))
conn.commit()
#files #files
def get_file(self, oshash): def file(self, oshash):
conn = self.conn() conn, c = self.conn()
c = conn.cursor()
f = {} f = {}
sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?' sql = 'SELECT path, folder, filename, info FROM file WHERE oshash=?'
c.execute(sql, (oshash, )) c.execute(sql, (oshash, ))
for row in c: for row in c:
f['path'] = row[0] f['path'] = row[0]
f['archive'] = row[1] f['folder'] = row[1]
f['folder'] = row[2] f['filename'] = row[2]
f['filename'] = row[3] f['info'] = json.loads(row[3])
f['info'] = json.loads(row[4])
break break
return f return f
def files(self, since=None): def files(self, site, archive, since=None):
conn = self.conn() conn, c = self.conn()
c = conn.cursor() c.execute('SELECT path from archive where name=? AND site=?', (archive, site))
prefix = None
for row in c:
prefix = row[0]
if not prefix:
return {}
def get_files(files, key, sql, t=()): def get_files(files, key, sql, t=()):
t = list(t) + [u"%s%%"%prefix]
c.execute(sql, t) c.execute(sql, t)
for row in c: for row in c:
archive = row[0] folder = row[0]
folder = row[1] filename = row[1]
filename = row[2] info = json.loads(row[2])
info = json.loads(row[3])
if not archive in files: files[archive]={}
if key: if key:
if not key in files[archive]: files[archive][key]={} if not key in files: files[key]={}
if not folder in files[archive][key]: files[archive][key][folder]={} if not folder in files[key]: files[key][folder]={}
files[archive][key][folder][filename] = info files[key][folder][filename] = info
else: else:
if not folder in files[archive]: files[archive][folder]={} if not folder in files: files[folder]={}
files[archive][folder][filename] = info files[folder][filename] = info
files = {} files = {}
sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE ' sql_prefix = 'SELECT folder, filename, info FROM file WHERE '
sql_postfix = ' deleted < 0 ORDER BY path' sql_postfix = ' deleted < 0 AND path LIKE ? ORDER BY path'
if since: if since:
get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, )) get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, ))
get_files(files, 'modified', get_files(files, 'modified',
@ -389,9 +390,7 @@ class Database(object):
#derivative #derivative
def derivative(self, oshash, name, status=None): def derivative(self, oshash, name, status=None):
conn = self.conn() conn, c = self.conn()
c = conn.cursor()
d = {} d = {}
d['oshash'] = oshash d['oshash'] = oshash
d['name'] = name d['name'] = name
@ -417,8 +416,7 @@ class Database(object):
return d return d
def derivatives(self, oshash, status=STATUS_AVAILABLE): def derivatives(self, oshash, status=STATUS_AVAILABLE):
conn = self.conn() conn, c = self.conn()
c = conn.cursor()
derivatives = [] derivatives = []
sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?' sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?'
c.execute(sql, (status, oshash)) c.execute(sql, (status, oshash))
@ -427,7 +425,7 @@ class Database(object):
return derivatives return derivatives
def extract_derivative(self, oshash, name): def extract_derivative(self, oshash, name):
f = self.get_file(oshash) f = self.file(oshash)
derivative = self.derivative(oshash, name) derivative = self.derivative(oshash, name)
if derivative['status'] == STATUS_NEW: if derivative['status'] == STATUS_NEW:
if name.endswith('.png'): if name.endswith('.png'):
@ -450,16 +448,15 @@ class Database(object):
self.derivative(oshash, name, STATUS_FAILED) self.derivative(oshash, name, STATUS_FAILED)
#archive #archive
def update(self, archive, path, folder, filename): def update(self, path, folder, filename):
update = True conn, c = self.conn()
update = True
modified = time.mktime(time.localtime()) modified = time.mktime(time.localtime())
created = modified created = modified
sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?' sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE deleted < 0 AND path=?'
conn = self.conn() c.execute(sql, [path])
c = conn.cursor()
c.execute(sql, (path, ))
stat = os.stat(path) stat = os.stat(path)
for row in c: for row in c:
if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]: if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]:
@ -472,59 +469,86 @@ class Database(object):
info[key] = getattr(stat, 'st_'+key) info[key] = getattr(stat, 'st_'+key)
oshash = info['oshash'] oshash = info['oshash']
deleted = -1 deleted = -1
t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime, t = (path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
stat.st_size, json.dumps(info), created, modified, deleted) stat.st_size, json.dumps(info), created, modified, deleted)
c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t) c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
conn.commit() conn.commit()
def spider(self, archive): def spider(self, path):
path = self.archives()[archive]
path = os.path.normpath(path) path = os.path.normpath(path)
files = []
for dirpath, dirnames, filenames in os.walk(path): for dirpath, dirnames, filenames in os.walk(path):
if isinstance(dirpath, str):
dirpath = dirpath.decode('utf-8')
if filenames: if filenames:
prefix = dirpath[len(path)+1:] prefix = dirpath[len(path)+1:]
for filename in filenames: for filename in filenames:
if isinstance(filename, str):
filename = filename.decode('utf-8')
if not filename.startswith('._') and not filename in ('.DS_Store', ): if not filename.startswith('._') and not filename in ('.DS_Store', ):
print dirpath, filename file_path = os.path.join(dirpath, filename)
self.update(archive, os.path.join(dirpath, filename), prefix, filename) files.append(file_path)
self.update(file_path, prefix, filename)
conn, c = self.conn()
c.execute('SELECT path FROM file WHERE path LIKE ? AND deleted < 0', ["%s%%"%path])
known_files = [r[0] for r in c.fetchall()]
deleted_files = filter(lambda f: f not in files, known_files)
'''
print 'known'
print json.dumps(known_files, indent=2)
print 'spidered'
print json.dumps(files, indent=2)
'''
print 'now delete'
print json.dumps(deleted_files, indent=2)
if deleted_files:
deleted = time.mktime(time.localtime())
for f in deleted_files:
c.execute('UPDATE file SET deleted=? WHERE path=?', (deleted, f))
conn.commit()
def add_archive(self, site, name, path): def add_archive(self, site, name, path):
conn, c = self.conn()
path = os.path.normpath(path) path = os.path.normpath(path)
conn = self.conn()
c = conn.cursor()
created = time.mktime(time.localtime()) created = time.mktime(time.localtime())
t = (site, name, path, created, created) t = (site, name, path, created, created)
#FIXME: check if site/name exists or deal with error here
c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t) c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t)
conn.commit() conn.commit()
def archives(self): def archives(self, site):
conn = self.conn() conn, c = self.conn()
c = conn.cursor() sql = 'SELECT name, path FROM archive WHERE site=? ORDER BY name';
sql = 'SELECT name, path FROM archive ORDER BY name'; c.execute(sql, [site])
c.execute(sql)
archives = {} archives = {}
for row in c: for row in c:
archives[row[0]] = row[1] archives[row[0]] = row[1]
return archives return archives
def update_archives(self): def update_archives(self):
conn = self.conn() conn, c = self.conn()
c = conn.cursor() c.execute('SELECT path FROM archive WHERE updating = 0 GROUP BY path ORDER BY path')
c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name'); paths = [r[0] for r in c.fetchall()]
for row in c: def not_subpath(path):
name = row[0] for p in paths:
c.execute(u'UPDATE archive set updating=1 where name=?', (name, )) if p != path and path.startswith(p):
return False
return True
paths = filter(not_subpath, paths)
for path in paths:
c.execute(u'UPDATE archive SET updating=1 WHERE path LIKE ?', ['%s%%'%path])
conn.commit() conn.commit()
self.spider(name) self.spider(path)
updated = time.mktime(time.localtime()) updated = time.mktime(time.localtime())
c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name)) c.execute(u'UPDATE archive SET updated=?, updating=0 WHERE path LIKE ?', (updated, '%s%%'%path))
conn.commit() conn.commit()
def remove_archive(self, name): def remove_archive(self, site, name):
conn = self.conn() conn, c = self.conn()
c = conn.cursor() c.execute('DELETE FROM archive WHERE site=? AND name=?', [site, name])
c.execute('DELETE FROM archive WHERE path=?', (path, )) #fixme, files could be still used by subarchive
c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, )) #c.execute('DELETE FROM file WHERE path LIKE ?', ["%s%%"%path])
conn.commit() conn.commit()
#web #web
@ -544,7 +568,7 @@ class OxControl(Resource):
self.putChild("media", File(self.db.get('media_cache', 'media'))) self.putChild("media", File(self.db.get('media_cache', 'media')))
#FIXME: this is just for debugging #FIXME: this is just for debugging
if not 'Test' in self.db.archives(): if not 'Test' in self.db.archives('0xdb.org'):
self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies') self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies')
def putChild(self, name, child): def putChild(self, name, child):
@ -557,16 +581,46 @@ class OxControl(Resource):
return self return self
def render_GET(self, request): def render_GET(self, request):
if request.path == '/add_archive':
args = {}
for arg in ('site', 'name', 'path'):
args[arg] = request.args.get(arg)[0]
self.db.add_archive(**arg)
response = {'status': 'ok'}
return json_response(request, response)
if request.path == '/remove_archive':
args = {}
for arg in ('site', 'name'):
args[arg] = request.args.get(arg)[0]
self.db.remove_archive(**arg)
response = {'status': 'ok'}
return json_response(request, response)
if request.path == '/archives':
args = {}
for arg in ['site']:
args[arg] = request.args.get(arg)[0]
response = {}
response['archives'] = self.db.archives(**args)
return json_response(request, response)
if request.path == '/files': if request.path == '/files':
""" """
/files /files
optional ?since=unixtimestamp archive archive name
new/modified site site name
files by archive since (optional) timestamp, return changes since
files in archive
""" """
since = request.args.get("since", None) args = {}
if since: since = float(since[0]) for arg in ['site', 'archive']:
files = self.db.files(since) args[arg] = request.args[arg][0]
since = request.args.get("since", [None])[0]
if since:
args['since'] = float(since)
files = self.db.files(**args)
return json_response(request, files) return json_response(request, files)
if request.path == '/update': if request.path == '/update':
@ -583,12 +637,12 @@ class OxControl(Resource):
extract derivatives from videos extract derivatives from videos
""" """
oshash = request.args.get("oshash", [None])[0] oshash = request.args.get("oshash", [None])[0]
media = request.args.get("media", [None, ])[0] media = request.args.get("media", [None])[0]
retry = request.args.get("retry", [None, ])[0] retry = request.args.get("retry", [None])[0]
response = {'status': 'not enough data provided'} response = {'status': 'not enough data provided'}
f = self.db.get_file(oshash) f = self.db.file(oshash)
if not f: if not f:
response = {'status': 'unkown oshash'} response = {'status': 'unkown oshash'}
elif not 'duration' in f['info']: elif not 'duration' in f['info']:
@ -625,18 +679,19 @@ class OxControl(Resource):
if request.path == '/get': if request.path == '/get':
""" """
get information about a file, including derivatives get information about a file, including derivatives
oshash - oshash of file
""" """
oshash = request.args.get("oshash", [None, ])[0] oshash = request.args.get("oshash", [None, ])[0]
response = {'status': 'no oshash provided'} response = {'status': 'no oshash provided'}
if oshash: if oshash:
f = self.db.get_file(oshash) f = self.db.file(oshash)
response['status'] = 'available' response['status'] = 'available'
response['info'] = f['info'] response['info'] = f['info']
files = [f['location'] for f in self.db.derivatives(oshash)] files = [f['location'] for f in self.db.derivatives(oshash)]
response['video'] = filter(lambda f: f.endswith('.webm'), files) response['video'] = filter(lambda f: f.endswith('.webm'), files)
response['stills'] = filter(lambda f: f.endswith('.png'), files) response['stills'] = filter(lambda f: f.endswith('.png'), files)
return json_response(request, response) return json_response(request, response)
return "<!DOCTYPE html><html>this is not for humans</html>" return "<!DOCTYPE html><html>this is not for humans</html>"
if __name__ == '__main__': if __name__ == '__main__':