From 953d207377e97c73c4adf22d386a76f83737db3c Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 31 Jul 2010 15:13:37 +0200 Subject: [PATCH] cleaner site/archive/file seperation --- .bzrignore | 1 + TODO | 35 +++++++- media/test.js | 16 ++-- oxd.py | 241 +++++++++++++++++++++++++++++++------------------- 4 files changed, 189 insertions(+), 104 deletions(-) diff --git a/.bzrignore b/.bzrignore index 65426bc..79fdb85 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1 +1,2 @@ media/* +dev.sqlite diff --git a/TODO b/TODO index b25c0b4..4c3f71d 100644 --- a/TODO +++ b/TODO @@ -18,11 +18,38 @@ FIREFOX integration: media servers hosting the actuall videos and clients to access them - rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work) + +EXTENSION api: + oxff = OxFF() //is site is not allowed, ask if user wants to allow domaing to use oxff + + oxff.update() //check for new files, would be nicer if that would be somehow automatic and not needed + + oxff.archives() //return list of archive names + + //new archive + archive = oxff.archive('name') + archive.setLocation() //opens file dialog to set location of archive + + //get files + archive.files() + archive.files(since) //unixtimestamp, get new/deleted/... since + + //get info + oxff.get(oshash) //retuns info + //get media + oxff.get(oshash, 'stills') //retuns stills dict or None if not extracted + oxff.get(oshash, '96p.webm') //returns video location or none if not extracted + + //extract media + oxff.extract(oshash, 'stills') + oxff.extract(oshash, '96p.webm') + + //questions + - how to upload media to site? somehow make use of Firefogg + - could some of those requests take to long and require callbacks? + TODO: - add fields: - make archive / file link via id? - - is extracted field enough or requires frames/video thingy + use api like approach as done in pand.do/ra backend code cache location, should cache be inside of archive, home folder or whats a good default. must be a config option in diff --git a/media/test.js b/media/test.js index bee0597..5c2be1f 100644 --- a/media/test.js +++ b/media/test.js @@ -91,13 +91,15 @@ $(function(){ $m.append($files); return $m; } - backend.request('files', function(result) { - for_each_sorted(result, function(archive, movies) { - var $archive = $('
'); - $archive.html(archive); - $('#movies').append($archive); - for_each_sorted(movies, function(movie, files) { - $archive.append(addMovie(movie, files)); + backend.request('archives', {'site': '0xdb.org'}, function(result) { + $.each(result.archives, function(archive, path) { + backend.request('files', {'site': '0xdb.org', 'archive': archive}, function(result) { + var $archive = $('
'); + $archive.html(archive); + $('#movies').append($archive); + for_each_sorted(result, function(movie, files) { + $archive.append(addMovie(movie, files)); + }); }); }); }); diff --git a/oxd.py b/oxd.py index 8c26b80..b23bcda 100644 --- a/oxd.py +++ b/oxd.py @@ -173,7 +173,6 @@ def extract_still(video, target, position): shutil.rmtree(framedir) return r == 0 - def extract_video(video, target, profile, info): if not os.path.exists(target): fdir = os.path.dirname(target) @@ -228,8 +227,10 @@ def extract_video(video, target, profile, info): profile_cmd +=['-acodec', 'libvorbis'] aspect = dar.ratio + #use 1:1 pixel aspect ratio if dar is close to that if abs(width/height - dar) < 0.02: aspect = '%s:%s' % (width, height) + cmd = ['./ffmpeg', '-y', '-threads', '2', '-i', video ] + profile_cmd + [ @@ -262,7 +263,7 @@ class ExtractThread(Thread): self.db.extract.task_done() class Database(object): - def __init__(self, conn): + def __init__(self, db_conn): self.extract = Queue.Queue() for i in range(2): @@ -270,16 +271,15 @@ class Database(object): t.setDaemon(True) t.start() - self.db_conn = conn - conn = self.conn() - c = conn.cursor() + self.db_conn = db_conn + conn, c = self.conn() + c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''') if int(self.get('version', 0)) < 1: self.set('version', 1) db = [ '''CREATE TABLE IF NOT EXISTS file ( - archive varchar(1024), path varchar(1024) unique, folder varchar(1024), filename varchar(1024), @@ -292,16 +292,16 @@ class Database(object): created INT, modified INT, deleted INT)''', - '''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''', '''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''', '''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''', '''CREATE TABLE IF NOT EXISTS archive ( site varchar(1024), - name varchar(1024) unique, - path varchar(1024) unique, + name varchar(1024), + path varchar(1024), updated INT, created INT, - updating INT)''', + updating INT, + UNIQUE(site, name)))''', '''CREATE TABLE IF NOT EXISTS derivative ( oshash varchar(16), name varchar(1024), @@ -311,72 +311,73 @@ class Database(object): for i in db: c.execute(i) - c.execute('UPDATE archive set updating=0 WHERE 1=1') + c.execute('UPDATE archive set updating=0 WHERE updating!=0') conn.commit() def conn(self): conn = sqlite3.connect(self.db_conn, timeout=10) - conn.text_factory = str - return conn + conn.text_factory = sqlite3.OptimizedUnicode + return conn, conn.cursor() def get(self, key, default=None): - conn = self.conn() - c = conn.cursor() + conn, c = self.conn() c.execute('SELECT value FROM setting WHERE key = ?', (key, )) for row in c: return row[0] return default def set(self, key, value): - conn = self.conn() - c = conn.cursor() + conn, c = self.conn() c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value))) conn.commit() - def remove(self, path): + def remove_file(self, path): + conn, c = self.conn() sql = 'DELETE FROM file WHERE path=?' - conn = self.conn() - c = conn.cursor() c.execute(sql, (path, )) + conn.commit() #files - def get_file(self, oshash): - conn = self.conn() - c = conn.cursor() + def file(self, oshash): + conn, c = self.conn() f = {} - sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?' + sql = 'SELECT path, folder, filename, info FROM file WHERE oshash=?' c.execute(sql, (oshash, )) for row in c: f['path'] = row[0] - f['archive'] = row[1] - f['folder'] = row[2] - f['filename'] = row[3] - f['info'] = json.loads(row[4]) + f['folder'] = row[1] + f['filename'] = row[2] + f['info'] = json.loads(row[3]) break return f - def files(self, since=None): - conn = self.conn() - c = conn.cursor() + def files(self, site, archive, since=None): + conn, c = self.conn() + c.execute('SELECT path from archive where name=? AND site=?', (archive, site)) + prefix = None + for row in c: + prefix = row[0] + if not prefix: + return {} def get_files(files, key, sql, t=()): + t = list(t) + [u"%s%%"%prefix] + c.execute(sql, t) for row in c: - archive = row[0] - folder = row[1] - filename = row[2] - info = json.loads(row[3]) - if not archive in files: files[archive]={} + folder = row[0] + filename = row[1] + info = json.loads(row[2]) if key: - if not key in files[archive]: files[archive][key]={} - if not folder in files[archive][key]: files[archive][key][folder]={} - files[archive][key][folder][filename] = info + if not key in files: files[key]={} + if not folder in files[key]: files[key][folder]={} + files[key][folder][filename] = info else: - if not folder in files[archive]: files[archive][folder]={} - files[archive][folder][filename] = info + if not folder in files: files[folder]={} + files[folder][filename] = info files = {} - sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE ' - sql_postfix = ' deleted < 0 ORDER BY path' + sql_prefix = 'SELECT folder, filename, info FROM file WHERE ' + sql_postfix = ' deleted < 0 AND path LIKE ? ORDER BY path' if since: get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, )) get_files(files, 'modified', @@ -389,9 +390,7 @@ class Database(object): #derivative def derivative(self, oshash, name, status=None): - conn = self.conn() - c = conn.cursor() - + conn, c = self.conn() d = {} d['oshash'] = oshash d['name'] = name @@ -417,8 +416,7 @@ class Database(object): return d def derivatives(self, oshash, status=STATUS_AVAILABLE): - conn = self.conn() - c = conn.cursor() + conn, c = self.conn() derivatives = [] sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?' c.execute(sql, (status, oshash)) @@ -427,7 +425,7 @@ class Database(object): return derivatives def extract_derivative(self, oshash, name): - f = self.get_file(oshash) + f = self.file(oshash) derivative = self.derivative(oshash, name) if derivative['status'] == STATUS_NEW: if name.endswith('.png'): @@ -450,16 +448,15 @@ class Database(object): self.derivative(oshash, name, STATUS_FAILED) #archive - def update(self, archive, path, folder, filename): - update = True + def update(self, path, folder, filename): + conn, c = self.conn() + update = True modified = time.mktime(time.localtime()) created = modified - sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?' - conn = self.conn() - c = conn.cursor() - c.execute(sql, (path, )) + sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE deleted < 0 AND path=?' + c.execute(sql, [path]) stat = os.stat(path) for row in c: if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]: @@ -472,59 +469,86 @@ class Database(object): info[key] = getattr(stat, 'st_'+key) oshash = info['oshash'] deleted = -1 - t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime, + t = (path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime, stat.st_size, json.dumps(info), created, modified, deleted) - c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t) + c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t) conn.commit() - def spider(self, archive): - path = self.archives()[archive] + def spider(self, path): path = os.path.normpath(path) + files = [] for dirpath, dirnames, filenames in os.walk(path): + if isinstance(dirpath, str): + dirpath = dirpath.decode('utf-8') if filenames: prefix = dirpath[len(path)+1:] for filename in filenames: + if isinstance(filename, str): + filename = filename.decode('utf-8') if not filename.startswith('._') and not filename in ('.DS_Store', ): - print dirpath, filename - self.update(archive, os.path.join(dirpath, filename), prefix, filename) + file_path = os.path.join(dirpath, filename) + files.append(file_path) + self.update(file_path, prefix, filename) + + conn, c = self.conn() + c.execute('SELECT path FROM file WHERE path LIKE ? AND deleted < 0', ["%s%%"%path]) + known_files = [r[0] for r in c.fetchall()] + deleted_files = filter(lambda f: f not in files, known_files) + ''' + print 'known' + print json.dumps(known_files, indent=2) + print 'spidered' + print json.dumps(files, indent=2) + ''' + print 'now delete' + print json.dumps(deleted_files, indent=2) + if deleted_files: + deleted = time.mktime(time.localtime()) + for f in deleted_files: + c.execute('UPDATE file SET deleted=? WHERE path=?', (deleted, f)) + conn.commit() def add_archive(self, site, name, path): + conn, c = self.conn() path = os.path.normpath(path) - conn = self.conn() - c = conn.cursor() created = time.mktime(time.localtime()) t = (site, name, path, created, created) + #FIXME: check if site/name exists or deal with error here c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t) conn.commit() - def archives(self): - conn = self.conn() - c = conn.cursor() - sql = 'SELECT name, path FROM archive ORDER BY name'; - c.execute(sql) + def archives(self, site): + conn, c = self.conn() + sql = 'SELECT name, path FROM archive WHERE site=? ORDER BY name'; + c.execute(sql, [site]) archives = {} for row in c: archives[row[0]] = row[1] return archives def update_archives(self): - conn = self.conn() - c = conn.cursor() - c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name'); - for row in c: - name = row[0] - c.execute(u'UPDATE archive set updating=1 where name=?', (name, )) + conn, c = self.conn() + c.execute('SELECT path FROM archive WHERE updating = 0 GROUP BY path ORDER BY path') + paths = [r[0] for r in c.fetchall()] + def not_subpath(path): + for p in paths: + if p != path and path.startswith(p): + return False + return True + paths = filter(not_subpath, paths) + for path in paths: + c.execute(u'UPDATE archive SET updating=1 WHERE path LIKE ?', ['%s%%'%path]) conn.commit() - self.spider(name) + self.spider(path) updated = time.mktime(time.localtime()) - c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name)) + c.execute(u'UPDATE archive SET updated=?, updating=0 WHERE path LIKE ?', (updated, '%s%%'%path)) conn.commit() - def remove_archive(self, name): - conn = self.conn() - c = conn.cursor() - c.execute('DELETE FROM archive WHERE path=?', (path, )) - c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, )) + def remove_archive(self, site, name): + conn, c = self.conn() + c.execute('DELETE FROM archive WHERE site=? AND name=?', [site, name]) + #fixme, files could be still used by subarchive + #c.execute('DELETE FROM file WHERE path LIKE ?', ["%s%%"%path]) conn.commit() #web @@ -544,7 +568,7 @@ class OxControl(Resource): self.putChild("media", File(self.db.get('media_cache', 'media'))) #FIXME: this is just for debugging - if not 'Test' in self.db.archives(): + if not 'Test' in self.db.archives('0xdb.org'): self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies') def putChild(self, name, child): @@ -557,16 +581,46 @@ class OxControl(Resource): return self def render_GET(self, request): + if request.path == '/add_archive': + args = {} + for arg in ('site', 'name', 'path'): + args[arg] = request.args.get(arg)[0] + self.db.add_archive(**arg) + response = {'status': 'ok'} + return json_response(request, response) + + if request.path == '/remove_archive': + args = {} + for arg in ('site', 'name'): + args[arg] = request.args.get(arg)[0] + self.db.remove_archive(**arg) + response = {'status': 'ok'} + return json_response(request, response) + + if request.path == '/archives': + args = {} + for arg in ['site']: + args[arg] = request.args.get(arg)[0] + response = {} + response['archives'] = self.db.archives(**args) + return json_response(request, response) + if request.path == '/files': """ /files - optional ?since=unixtimestamp - new/modified - files by archive + archive archive name + site site name + since (optional) timestamp, return changes since + files in archive """ - since = request.args.get("since", None) - if since: since = float(since[0]) - files = self.db.files(since) + args = {} + for arg in ['site', 'archive']: + args[arg] = request.args[arg][0] + since = request.args.get("since", [None])[0] + if since: + args['since'] = float(since) + + files = self.db.files(**args) return json_response(request, files) if request.path == '/update': @@ -583,12 +637,12 @@ class OxControl(Resource): extract derivatives from videos """ oshash = request.args.get("oshash", [None])[0] - media = request.args.get("media", [None, ])[0] - retry = request.args.get("retry", [None, ])[0] + media = request.args.get("media", [None])[0] + retry = request.args.get("retry", [None])[0] response = {'status': 'not enough data provided'} - f = self.db.get_file(oshash) + f = self.db.file(oshash) if not f: response = {'status': 'unkown oshash'} elif not 'duration' in f['info']: @@ -625,18 +679,19 @@ class OxControl(Resource): if request.path == '/get': """ get information about a file, including derivatives + oshash - oshash of file """ oshash = request.args.get("oshash", [None, ])[0] response = {'status': 'no oshash provided'} if oshash: - f = self.db.get_file(oshash) + f = self.db.file(oshash) response['status'] = 'available' response['info'] = f['info'] files = [f['location'] for f in self.db.derivatives(oshash)] response['video'] = filter(lambda f: f.endswith('.webm'), files) response['stills'] = filter(lambda f: f.endswith('.png'), files) - return json_response(request, response) + return "this is not for humans" if __name__ == '__main__':