cleaner site/archive/file seperation

2010-07-31 15:13:37 +02:00 · 2010-07-31 15:13:37 +02:00 · 953d207377
commit 953d207377
parent ccfb0e27f5
4 changed files with 189 additions and 104 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -1 +1,2 @@
 media/*
+dev.sqlite
--- a/35
+++ b/35
@ -18,11 +18,38 @@ FIREFOX integration:
       media servers hosting the actuall videos and clients to access them
     - rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work)

+
+EXTENSION api:
+    oxff = OxFF() //is site is not allowed, ask if user wants to allow domaing to use oxff
+
+    oxff.update() //check for new files, would be nicer if that would be somehow automatic and not needed
+
+    oxff.archives() //return list of archive names
+
+    //new archive
+    archive = oxff.archive('name')
+    archive.setLocation() //opens file dialog to set location of archive
+
+    //get files
+    archive.files()
+    archive.files(since) //unixtimestamp, get new/deleted/... since
+
+    //get info
+    oxff.get(oshash) //retuns info
+    //get media
+    oxff.get(oshash, 'stills')   //retuns stills dict or None if not extracted
+    oxff.get(oshash, '96p.webm') //returns video location or none if not extracted
+
+    //extract media
+    oxff.extract(oshash, 'stills')
+    oxff.extract(oshash, '96p.webm')
+
+    //questions
+        - how to upload media to site? somehow make use of  Firefogg
+        - could some of those requests take to long and require callbacks?
+
 TODO:
-    add fields:
-        make archive / file link via id?
-        
-        is extracted field enough or requires frames/video thingy
+    use api like approach as done in pand.do/ra backend code

    cache location, should cache be inside of archive, home folder or whats a good default.
    must be a config option in
--- a/media/test.js
+++ b/media/test.js
@ -91,13 +91,15 @@ $(function(){
        $m.append($files);
        return $m;
    }
-    backend.request('files', function(result) {
-        for_each_sorted(result, function(archive, movies) {
-            var $archive = $('<div>');
-            $archive.html(archive);
-            $('#movies').append($archive);
-            for_each_sorted(movies, function(movie, files) {
-                $archive.append(addMovie(movie, files));
+    backend.request('archives', {'site': '0xdb.org'}, function(result) {
+        $.each(result.archives, function(archive, path) {
+            backend.request('files', {'site': '0xdb.org', 'archive': archive}, function(result) {
+                var $archive = $('<div>');
+                $archive.html(archive);
+                $('#movies').append($archive);
+                for_each_sorted(result, function(movie, files) {
+                    $archive.append(addMovie(movie, files));
+                });
            });
        });
    });
--- a/oxd.py
+++ b/oxd.py
@ -173,7 +173,6 @@ def extract_still(video, target, position):
    shutil.rmtree(framedir)
    return r == 0

-
 def extract_video(video, target, profile, info):
    if not os.path.exists(target):
        fdir = os.path.dirname(target)
@ -228,8 +227,10 @@ def extract_video(video, target, profile, info):
        profile_cmd +=['-acodec', 'libvorbis']

    aspect = dar.ratio
+    #use 1:1 pixel aspect ratio if dar is close to that
    if abs(width/height - dar) < 0.02:
        aspect = '%s:%s' % (width, height)
+
    cmd = ['./ffmpeg', '-y', '-threads', '2',
        '-i', video
        ] + profile_cmd + [
@ -262,7 +263,7 @@ class ExtractThread(Thread):
            self.db.extract.task_done()

 class Database(object):
-    def __init__(self, conn):
+    def __init__(self, db_conn):

        self.extract = Queue.Queue()
        for i in range(2):
@ -270,16 +271,15 @@ class Database(object):
          t.setDaemon(True)
          t.start()

-        self.db_conn = conn
-        conn = self.conn()
-        c = conn.cursor()
+        self.db_conn = db_conn
+        conn, c = self.conn()
+
        c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''')

        if int(self.get('version', 0)) < 1:
            self.set('version', 1)
            db = [
                '''CREATE TABLE IF NOT EXISTS file (
-                                archive varchar(1024),
                                path varchar(1024) unique,
                                folder varchar(1024),
                                filename varchar(1024),
@ -292,16 +292,16 @@ class Database(object):
                                created INT,
                                modified INT,
                                deleted INT)''',
-                '''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''',
                '''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''',
                '''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''',
                '''CREATE TABLE IF NOT EXISTS archive (
                                site varchar(1024),
-                                name varchar(1024) unique,
-                                path varchar(1024) unique,
+                                name varchar(1024),
+                                path varchar(1024),
                                updated INT,
                                created INT,
-                                updating INT)''',
+                                updating INT,
+                                UNIQUE(site, name)))''',
                '''CREATE TABLE IF NOT EXISTS derivative (
                                oshash varchar(16),
                                name varchar(1024),
@ -311,72 +311,73 @@ class Database(object):
            for i in db:
                c.execute(i)

-        c.execute('UPDATE archive set updating=0 WHERE 1=1')
+        c.execute('UPDATE archive set updating=0 WHERE updating!=0')
        conn.commit()

    def conn(self):
        conn = sqlite3.connect(self.db_conn, timeout=10)
-        conn.text_factory = str
-        return conn
+        conn.text_factory = sqlite3.OptimizedUnicode
+        return conn, conn.cursor()

    def get(self, key, default=None):
-        conn = self.conn()
-        c = conn.cursor()
+        conn, c = self.conn()
        c.execute('SELECT value FROM setting WHERE key = ?', (key, ))
        for row in c:
            return row[0]
        return default
    
    def set(self, key, value):
-        conn = self.conn()
-        c = conn.cursor()
+        conn, c = self.conn()
        c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value)))
        conn.commit()

-    def remove(self, path):
+    def remove_file(self, path):
+        conn, c = self.conn()
        sql = 'DELETE FROM file WHERE path=?'
-        conn = self.conn()
-        c = conn.cursor()
        c.execute(sql, (path, ))
+        conn.commit()
    
    #files
-    def get_file(self, oshash):
-        conn = self.conn()
-        c = conn.cursor()
+    def file(self, oshash):
+        conn, c = self.conn()
        f = {}
-        sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?'
+        sql = 'SELECT path, folder, filename, info FROM file WHERE oshash=?'
        c.execute(sql, (oshash, ))
        for row in c:
            f['path'] = row[0]
-            f['archive'] = row[1]
-            f['folder'] = row[2]
-            f['filename'] = row[3]
-            f['info'] = json.loads(row[4])
+            f['folder'] = row[1]
+            f['filename'] = row[2]
+            f['info'] = json.loads(row[3])
            break
        return f

-    def files(self, since=None):
-        conn = self.conn()
-        c = conn.cursor()
+    def files(self, site, archive, since=None):
+        conn, c = self.conn()
+        c.execute('SELECT path from archive where name=? AND site=?', (archive, site))
+        prefix = None
+        for row in c:
+            prefix = row[0]
+        if not prefix:
+            return {}

        def get_files(files, key, sql, t=()):
+            t = list(t) + [u"%s%%"%prefix]
+
            c.execute(sql, t)
            for row in c:
-                archive = row[0]
-                folder = row[1]
-                filename = row[2]
-                info = json.loads(row[3])
-                if not archive in files: files[archive]={}
+                folder = row[0]
+                filename = row[1]
+                info = json.loads(row[2])
                if key:
-                    if not key in files[archive]: files[archive][key]={}
-                    if not folder in files[archive][key]: files[archive][key][folder]={}
-                    files[archive][key][folder][filename] = info
+                    if not key in files: files[key]={}
+                    if not folder in files[key]: files[key][folder]={}
+                    files[key][folder][filename] = info
                else:
-                    if not folder in files[archive]: files[archive][folder]={}
-                    files[archive][folder][filename] = info
+                    if not folder in files: files[folder]={}
+                    files[folder][filename] = info
        files = {}
-        sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE '
-        sql_postfix = ' deleted < 0 ORDER BY path'
+        sql_prefix = 'SELECT folder, filename, info FROM file WHERE '
+        sql_postfix = ' deleted < 0 AND path LIKE ? ORDER BY path'
        if since:
            get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, ))
            get_files(files, 'modified',
@ -389,9 +390,7 @@ class Database(object):

    #derivative
    def derivative(self, oshash, name, status=None):
-        conn = self.conn()
-        c = conn.cursor()
-
+        conn, c = self.conn()
        d = {}
        d['oshash'] = oshash
        d['name'] = name
@ -417,8 +416,7 @@ class Database(object):
        return d

    def derivatives(self, oshash, status=STATUS_AVAILABLE):
-        conn = self.conn()
-        c = conn.cursor()
+        conn, c = self.conn()
        derivatives = []
        sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?'
        c.execute(sql, (status, oshash))
@ -427,7 +425,7 @@ class Database(object):
        return derivatives

    def extract_derivative(self, oshash, name):
-        f = self.get_file(oshash)
+        f = self.file(oshash)
        derivative = self.derivative(oshash, name)
        if derivative['status'] == STATUS_NEW:
            if name.endswith('.png'):
@ -450,16 +448,15 @@ class Database(object):
                    self.derivative(oshash, name, STATUS_FAILED)

    #archive
-    def update(self, archive, path, folder, filename):
-        update = True
+    def update(self, path, folder, filename):
+        conn, c = self.conn()

+        update = True
        modified = time.mktime(time.localtime())
        created = modified

-        sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?'
-        conn = self.conn()
-        c = conn.cursor()
-        c.execute(sql, (path, ))
+        sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE deleted < 0 AND path=?'
+        c.execute(sql, [path])
        stat = os.stat(path)
        for row in c:
            if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]:
@ -472,59 +469,86 @@ class Database(object):
                info[key] = getattr(stat, 'st_'+key)
            oshash = info['oshash']
            deleted = -1
-            t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
+            t = (path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
                 stat.st_size, json.dumps(info), created, modified, deleted)
-            c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
+            c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
            conn.commit()

-    def spider(self, archive):
-        path = self.archives()[archive]
+    def spider(self, path):
        path = os.path.normpath(path)
+        files = []
        for dirpath, dirnames, filenames in os.walk(path):
+            if isinstance(dirpath, str):
+                dirpath = dirpath.decode('utf-8')
            if filenames:
                prefix = dirpath[len(path)+1:]
                for filename in filenames:
+                    if isinstance(filename, str):
+                        filename = filename.decode('utf-8')
                    if not filename.startswith('._') and not filename in ('.DS_Store', ):
-                        print dirpath, filename
-                        self.update(archive, os.path.join(dirpath, filename), prefix, filename)
+                        file_path = os.path.join(dirpath, filename)
+                        files.append(file_path)
+                        self.update(file_path, prefix, filename)
+
+        conn, c = self.conn()
+        c.execute('SELECT path FROM file WHERE path LIKE ? AND deleted < 0', ["%s%%"%path])
+        known_files = [r[0] for r in c.fetchall()]
+        deleted_files = filter(lambda f: f not in files, known_files)
+        '''
+        print 'known'
+        print json.dumps(known_files, indent=2)
+        print 'spidered'
+        print json.dumps(files, indent=2)
+        '''
+        print 'now delete'
+        print json.dumps(deleted_files, indent=2)
+        if deleted_files:
+            deleted = time.mktime(time.localtime())
+            for f in deleted_files:
+                c.execute('UPDATE file SET deleted=? WHERE path=?', (deleted, f))
+            conn.commit()

    def add_archive(self, site, name, path):
+        conn, c = self.conn()
        path = os.path.normpath(path)
-        conn = self.conn()
-        c = conn.cursor()
        created = time.mktime(time.localtime())
        t = (site, name, path, created, created)
+        #FIXME: check if site/name exists or deal with error here
        c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t)
        conn.commit()

-    def archives(self):
-        conn = self.conn()
-        c = conn.cursor()
-        sql = 'SELECT name, path FROM archive ORDER BY name';
-        c.execute(sql)
+    def archives(self, site):
+        conn, c = self.conn()
+        sql = 'SELECT name, path FROM archive WHERE site=? ORDER BY name';
+        c.execute(sql, [site])
        archives = {}
        for row in c:
            archives[row[0]] = row[1]
        return archives

    def update_archives(self):
-        conn = self.conn()
-        c = conn.cursor()
-        c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name');
-        for row in c:
-            name = row[0]
-            c.execute(u'UPDATE archive set updating=1 where name=?', (name, ))
+        conn, c = self.conn()
+        c.execute('SELECT path FROM archive WHERE updating = 0 GROUP BY path ORDER BY path')
+        paths = [r[0] for r in c.fetchall()]
+        def not_subpath(path):
+            for p in paths:
+                if p != path and path.startswith(p):
+                    return False
+            return True
+        paths = filter(not_subpath, paths)
+        for path in paths:
+            c.execute(u'UPDATE archive SET updating=1 WHERE path LIKE ?', ['%s%%'%path])
            conn.commit()
-            self.spider(name)
+            self.spider(path)
            updated = time.mktime(time.localtime())
-            c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name))
+            c.execute(u'UPDATE archive SET updated=?, updating=0 WHERE path LIKE ?', (updated, '%s%%'%path))
            conn.commit()

-    def remove_archive(self, name):
-        conn = self.conn()
-        c = conn.cursor()
-        c.execute('DELETE FROM archive WHERE path=?', (path, ))
-        c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, ))
+    def remove_archive(self, site, name):
+        conn, c = self.conn()
+        c.execute('DELETE FROM archive WHERE site=? AND name=?', [site, name])
+        #fixme, files could be still used by subarchive
+        #c.execute('DELETE FROM file WHERE path LIKE ?', ["%s%%"%path])
        conn.commit()

 #web
@ -544,7 +568,7 @@ class OxControl(Resource):
        self.putChild("media", File(self.db.get('media_cache', 'media')))

        #FIXME: this is just for debugging
-        if not 'Test' in self.db.archives():
+        if not 'Test' in self.db.archives('0xdb.org'):
            self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies')

    def putChild(self, name, child):
@ -557,16 +581,46 @@ class OxControl(Resource):
        return self

    def render_GET(self, request):
+        if request.path == '/add_archive':
+            args = {}
+            for arg in ('site', 'name', 'path'):
+                args[arg] = request.args.get(arg)[0]
+            self.db.add_archive(**arg)
+            response = {'status': 'ok'}
+            return json_response(request, response)
+
+        if request.path == '/remove_archive':
+            args = {}
+            for arg in ('site', 'name'):
+                args[arg] = request.args.get(arg)[0]
+            self.db.remove_archive(**arg)
+            response = {'status': 'ok'}
+            return json_response(request, response)
+
+        if request.path == '/archives':
+            args = {}
+            for arg in ['site']:
+                args[arg] = request.args.get(arg)[0]
+            response = {}
+            response['archives'] = self.db.archives(**args)
+            return json_response(request, response)
+
        if request.path == '/files':
            """
            /files
-             optional ?since=unixtimestamp
-                new/modified
-            files by archive
+                archive  archive name
+                site     site name
+                since (optional) timestamp, return changes since
+           files in archive
            """
-            since = request.args.get("since", None)
-            if since: since = float(since[0])
-            files = self.db.files(since)
+            args = {}
+            for arg in ['site', 'archive']:
+                args[arg] = request.args[arg][0]
+            since = request.args.get("since", [None])[0]
+            if since:
+                args['since'] = float(since)
+
+            files = self.db.files(**args)
            return json_response(request, files)

        if request.path == '/update':
@ -583,12 +637,12 @@ class OxControl(Resource):
            extract derivatives from videos
            """
            oshash = request.args.get("oshash", [None])[0]
-            media = request.args.get("media", [None, ])[0]
-            retry = request.args.get("retry", [None, ])[0]
+            media = request.args.get("media", [None])[0]
+            retry = request.args.get("retry", [None])[0]

            response = {'status': 'not enough data provided'}

-            f = self.db.get_file(oshash)
+            f = self.db.file(oshash)
            if not f:
                response = {'status': 'unkown oshash'}
            elif not 'duration' in f['info']:
@ -625,18 +679,19 @@ class OxControl(Resource):
        if request.path == '/get':
            """
            get information about a file, including derivatives
+            oshash - oshash of file
            """
            oshash = request.args.get("oshash", [None, ])[0]
            response = {'status': 'no oshash provided'}
            if oshash:
-                f = self.db.get_file(oshash)
+                f = self.db.file(oshash)
                response['status'] = 'available'
                response['info'] = f['info']
                files = [f['location'] for f in self.db.derivatives(oshash)]
                response['video'] = filter(lambda f: f.endswith('.webm'), files)
                response['stills'] = filter(lambda f: f.endswith('.png'), files)
-
            return json_response(request, response)
+
        return "<!DOCTYPE html><html>this is not for humans</html>"

 if __name__ == '__main__':