cleaner site/archive/file seperation

This commit is contained in:
j 2010-07-31 15:13:37 +02:00
parent ccfb0e27f5
commit 953d207377
4 changed files with 189 additions and 104 deletions

View file

@ -1 +1,2 @@
media/*
dev.sqlite

35
TODO
View file

@ -18,11 +18,38 @@ FIREFOX integration:
media servers hosting the actuall videos and clients to access them
- rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work)
TODO:
add fields:
make archive / file link via id?
is extracted field enough or requires frames/video thingy
EXTENSION api:
oxff = OxFF() //is site is not allowed, ask if user wants to allow domaing to use oxff
oxff.update() //check for new files, would be nicer if that would be somehow automatic and not needed
oxff.archives() //return list of archive names
//new archive
archive = oxff.archive('name')
archive.setLocation() //opens file dialog to set location of archive
//get files
archive.files()
archive.files(since) //unixtimestamp, get new/deleted/... since
//get info
oxff.get(oshash) //retuns info
//get media
oxff.get(oshash, 'stills') //retuns stills dict or None if not extracted
oxff.get(oshash, '96p.webm') //returns video location or none if not extracted
//extract media
oxff.extract(oshash, 'stills')
oxff.extract(oshash, '96p.webm')
//questions
- how to upload media to site? somehow make use of Firefogg
- could some of those requests take to long and require callbacks?
TODO:
use api like approach as done in pand.do/ra backend code
cache location, should cache be inside of archive, home folder or whats a good default.
must be a config option in

View file

@ -91,13 +91,15 @@ $(function(){
$m.append($files);
return $m;
}
backend.request('files', function(result) {
for_each_sorted(result, function(archive, movies) {
var $archive = $('<div>');
$archive.html(archive);
$('#movies').append($archive);
for_each_sorted(movies, function(movie, files) {
$archive.append(addMovie(movie, files));
backend.request('archives', {'site': '0xdb.org'}, function(result) {
$.each(result.archives, function(archive, path) {
backend.request('files', {'site': '0xdb.org', 'archive': archive}, function(result) {
var $archive = $('<div>');
$archive.html(archive);
$('#movies').append($archive);
for_each_sorted(result, function(movie, files) {
$archive.append(addMovie(movie, files));
});
});
});
});

241
oxd.py
View file

@ -173,7 +173,6 @@ def extract_still(video, target, position):
shutil.rmtree(framedir)
return r == 0
def extract_video(video, target, profile, info):
if not os.path.exists(target):
fdir = os.path.dirname(target)
@ -228,8 +227,10 @@ def extract_video(video, target, profile, info):
profile_cmd +=['-acodec', 'libvorbis']
aspect = dar.ratio
#use 1:1 pixel aspect ratio if dar is close to that
if abs(width/height - dar) < 0.02:
aspect = '%s:%s' % (width, height)
cmd = ['./ffmpeg', '-y', '-threads', '2',
'-i', video
] + profile_cmd + [
@ -262,7 +263,7 @@ class ExtractThread(Thread):
self.db.extract.task_done()
class Database(object):
def __init__(self, conn):
def __init__(self, db_conn):
self.extract = Queue.Queue()
for i in range(2):
@ -270,16 +271,15 @@ class Database(object):
t.setDaemon(True)
t.start()
self.db_conn = conn
conn = self.conn()
c = conn.cursor()
self.db_conn = db_conn
conn, c = self.conn()
c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''')
if int(self.get('version', 0)) < 1:
self.set('version', 1)
db = [
'''CREATE TABLE IF NOT EXISTS file (
archive varchar(1024),
path varchar(1024) unique,
folder varchar(1024),
filename varchar(1024),
@ -292,16 +292,16 @@ class Database(object):
created INT,
modified INT,
deleted INT)''',
'''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''',
'''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''',
'''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''',
'''CREATE TABLE IF NOT EXISTS archive (
site varchar(1024),
name varchar(1024) unique,
path varchar(1024) unique,
name varchar(1024),
path varchar(1024),
updated INT,
created INT,
updating INT)''',
updating INT,
UNIQUE(site, name)))''',
'''CREATE TABLE IF NOT EXISTS derivative (
oshash varchar(16),
name varchar(1024),
@ -311,72 +311,73 @@ class Database(object):
for i in db:
c.execute(i)
c.execute('UPDATE archive set updating=0 WHERE 1=1')
c.execute('UPDATE archive set updating=0 WHERE updating!=0')
conn.commit()
def conn(self):
conn = sqlite3.connect(self.db_conn, timeout=10)
conn.text_factory = str
return conn
conn.text_factory = sqlite3.OptimizedUnicode
return conn, conn.cursor()
def get(self, key, default=None):
conn = self.conn()
c = conn.cursor()
conn, c = self.conn()
c.execute('SELECT value FROM setting WHERE key = ?', (key, ))
for row in c:
return row[0]
return default
def set(self, key, value):
conn = self.conn()
c = conn.cursor()
conn, c = self.conn()
c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value)))
conn.commit()
def remove(self, path):
def remove_file(self, path):
conn, c = self.conn()
sql = 'DELETE FROM file WHERE path=?'
conn = self.conn()
c = conn.cursor()
c.execute(sql, (path, ))
conn.commit()
#files
def get_file(self, oshash):
conn = self.conn()
c = conn.cursor()
def file(self, oshash):
conn, c = self.conn()
f = {}
sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?'
sql = 'SELECT path, folder, filename, info FROM file WHERE oshash=?'
c.execute(sql, (oshash, ))
for row in c:
f['path'] = row[0]
f['archive'] = row[1]
f['folder'] = row[2]
f['filename'] = row[3]
f['info'] = json.loads(row[4])
f['folder'] = row[1]
f['filename'] = row[2]
f['info'] = json.loads(row[3])
break
return f
def files(self, since=None):
conn = self.conn()
c = conn.cursor()
def files(self, site, archive, since=None):
conn, c = self.conn()
c.execute('SELECT path from archive where name=? AND site=?', (archive, site))
prefix = None
for row in c:
prefix = row[0]
if not prefix:
return {}
def get_files(files, key, sql, t=()):
t = list(t) + [u"%s%%"%prefix]
c.execute(sql, t)
for row in c:
archive = row[0]
folder = row[1]
filename = row[2]
info = json.loads(row[3])
if not archive in files: files[archive]={}
folder = row[0]
filename = row[1]
info = json.loads(row[2])
if key:
if not key in files[archive]: files[archive][key]={}
if not folder in files[archive][key]: files[archive][key][folder]={}
files[archive][key][folder][filename] = info
if not key in files: files[key]={}
if not folder in files[key]: files[key][folder]={}
files[key][folder][filename] = info
else:
if not folder in files[archive]: files[archive][folder]={}
files[archive][folder][filename] = info
if not folder in files: files[folder]={}
files[folder][filename] = info
files = {}
sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE '
sql_postfix = ' deleted < 0 ORDER BY path'
sql_prefix = 'SELECT folder, filename, info FROM file WHERE '
sql_postfix = ' deleted < 0 AND path LIKE ? ORDER BY path'
if since:
get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, ))
get_files(files, 'modified',
@ -389,9 +390,7 @@ class Database(object):
#derivative
def derivative(self, oshash, name, status=None):
conn = self.conn()
c = conn.cursor()
conn, c = self.conn()
d = {}
d['oshash'] = oshash
d['name'] = name
@ -417,8 +416,7 @@ class Database(object):
return d
def derivatives(self, oshash, status=STATUS_AVAILABLE):
conn = self.conn()
c = conn.cursor()
conn, c = self.conn()
derivatives = []
sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?'
c.execute(sql, (status, oshash))
@ -427,7 +425,7 @@ class Database(object):
return derivatives
def extract_derivative(self, oshash, name):
f = self.get_file(oshash)
f = self.file(oshash)
derivative = self.derivative(oshash, name)
if derivative['status'] == STATUS_NEW:
if name.endswith('.png'):
@ -450,16 +448,15 @@ class Database(object):
self.derivative(oshash, name, STATUS_FAILED)
#archive
def update(self, archive, path, folder, filename):
update = True
def update(self, path, folder, filename):
conn, c = self.conn()
update = True
modified = time.mktime(time.localtime())
created = modified
sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?'
conn = self.conn()
c = conn.cursor()
c.execute(sql, (path, ))
sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE deleted < 0 AND path=?'
c.execute(sql, [path])
stat = os.stat(path)
for row in c:
if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]:
@ -472,59 +469,86 @@ class Database(object):
info[key] = getattr(stat, 'st_'+key)
oshash = info['oshash']
deleted = -1
t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
t = (path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
stat.st_size, json.dumps(info), created, modified, deleted)
c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
conn.commit()
def spider(self, archive):
path = self.archives()[archive]
def spider(self, path):
path = os.path.normpath(path)
files = []
for dirpath, dirnames, filenames in os.walk(path):
if isinstance(dirpath, str):
dirpath = dirpath.decode('utf-8')
if filenames:
prefix = dirpath[len(path)+1:]
for filename in filenames:
if isinstance(filename, str):
filename = filename.decode('utf-8')
if not filename.startswith('._') and not filename in ('.DS_Store', ):
print dirpath, filename
self.update(archive, os.path.join(dirpath, filename), prefix, filename)
file_path = os.path.join(dirpath, filename)
files.append(file_path)
self.update(file_path, prefix, filename)
conn, c = self.conn()
c.execute('SELECT path FROM file WHERE path LIKE ? AND deleted < 0', ["%s%%"%path])
known_files = [r[0] for r in c.fetchall()]
deleted_files = filter(lambda f: f not in files, known_files)
'''
print 'known'
print json.dumps(known_files, indent=2)
print 'spidered'
print json.dumps(files, indent=2)
'''
print 'now delete'
print json.dumps(deleted_files, indent=2)
if deleted_files:
deleted = time.mktime(time.localtime())
for f in deleted_files:
c.execute('UPDATE file SET deleted=? WHERE path=?', (deleted, f))
conn.commit()
def add_archive(self, site, name, path):
conn, c = self.conn()
path = os.path.normpath(path)
conn = self.conn()
c = conn.cursor()
created = time.mktime(time.localtime())
t = (site, name, path, created, created)
#FIXME: check if site/name exists or deal with error here
c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t)
conn.commit()
def archives(self):
conn = self.conn()
c = conn.cursor()
sql = 'SELECT name, path FROM archive ORDER BY name';
c.execute(sql)
def archives(self, site):
conn, c = self.conn()
sql = 'SELECT name, path FROM archive WHERE site=? ORDER BY name';
c.execute(sql, [site])
archives = {}
for row in c:
archives[row[0]] = row[1]
return archives
def update_archives(self):
conn = self.conn()
c = conn.cursor()
c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name');
for row in c:
name = row[0]
c.execute(u'UPDATE archive set updating=1 where name=?', (name, ))
conn, c = self.conn()
c.execute('SELECT path FROM archive WHERE updating = 0 GROUP BY path ORDER BY path')
paths = [r[0] for r in c.fetchall()]
def not_subpath(path):
for p in paths:
if p != path and path.startswith(p):
return False
return True
paths = filter(not_subpath, paths)
for path in paths:
c.execute(u'UPDATE archive SET updating=1 WHERE path LIKE ?', ['%s%%'%path])
conn.commit()
self.spider(name)
self.spider(path)
updated = time.mktime(time.localtime())
c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name))
c.execute(u'UPDATE archive SET updated=?, updating=0 WHERE path LIKE ?', (updated, '%s%%'%path))
conn.commit()
def remove_archive(self, name):
conn = self.conn()
c = conn.cursor()
c.execute('DELETE FROM archive WHERE path=?', (path, ))
c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, ))
def remove_archive(self, site, name):
conn, c = self.conn()
c.execute('DELETE FROM archive WHERE site=? AND name=?', [site, name])
#fixme, files could be still used by subarchive
#c.execute('DELETE FROM file WHERE path LIKE ?', ["%s%%"%path])
conn.commit()
#web
@ -544,7 +568,7 @@ class OxControl(Resource):
self.putChild("media", File(self.db.get('media_cache', 'media')))
#FIXME: this is just for debugging
if not 'Test' in self.db.archives():
if not 'Test' in self.db.archives('0xdb.org'):
self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies')
def putChild(self, name, child):
@ -557,16 +581,46 @@ class OxControl(Resource):
return self
def render_GET(self, request):
if request.path == '/add_archive':
args = {}
for arg in ('site', 'name', 'path'):
args[arg] = request.args.get(arg)[0]
self.db.add_archive(**arg)
response = {'status': 'ok'}
return json_response(request, response)
if request.path == '/remove_archive':
args = {}
for arg in ('site', 'name'):
args[arg] = request.args.get(arg)[0]
self.db.remove_archive(**arg)
response = {'status': 'ok'}
return json_response(request, response)
if request.path == '/archives':
args = {}
for arg in ['site']:
args[arg] = request.args.get(arg)[0]
response = {}
response['archives'] = self.db.archives(**args)
return json_response(request, response)
if request.path == '/files':
"""
/files
optional ?since=unixtimestamp
new/modified
files by archive
archive archive name
site site name
since (optional) timestamp, return changes since
files in archive
"""
since = request.args.get("since", None)
if since: since = float(since[0])
files = self.db.files(since)
args = {}
for arg in ['site', 'archive']:
args[arg] = request.args[arg][0]
since = request.args.get("since", [None])[0]
if since:
args['since'] = float(since)
files = self.db.files(**args)
return json_response(request, files)
if request.path == '/update':
@ -583,12 +637,12 @@ class OxControl(Resource):
extract derivatives from videos
"""
oshash = request.args.get("oshash", [None])[0]
media = request.args.get("media", [None, ])[0]
retry = request.args.get("retry", [None, ])[0]
media = request.args.get("media", [None])[0]
retry = request.args.get("retry", [None])[0]
response = {'status': 'not enough data provided'}
f = self.db.get_file(oshash)
f = self.db.file(oshash)
if not f:
response = {'status': 'unkown oshash'}
elif not 'duration' in f['info']:
@ -625,18 +679,19 @@ class OxControl(Resource):
if request.path == '/get':
"""
get information about a file, including derivatives
oshash - oshash of file
"""
oshash = request.args.get("oshash", [None, ])[0]
response = {'status': 'no oshash provided'}
if oshash:
f = self.db.get_file(oshash)
f = self.db.file(oshash)
response['status'] = 'available'
response['info'] = f['info']
files = [f['location'] for f in self.db.derivatives(oshash)]
response['video'] = filter(lambda f: f.endswith('.webm'), files)
response['stills'] = filter(lambda f: f.endswith('.png'), files)
return json_response(request, response)
return "<!DOCTYPE html><html>this is not for humans</html>"
if __name__ == '__main__':