From 953d207377e97c73c4adf22d386a76f83737db3c Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sat, 31 Jul 2010 15:13:37 +0200
Subject: [PATCH] cleaner site/archive/file seperation

---
 .bzrignore    |   1 +
 TODO          |  35 +++++++-
 media/test.js |  16 ++--
 oxd.py        | 241 +++++++++++++++++++++++++++++++-------------------
 4 files changed, 189 insertions(+), 104 deletions(-)
diff --git a/.bzrignore b/.bzrignore
index 65426bc..79fdb85 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1 +1,2 @@
 media/*
+dev.sqlite
diff --git a/TODO b/TODO
index b25c0b4..4c3f71d 100644
--- a/TODO
+++ b/TODO
@@ -18,11 +18,38 @@ FIREFOX integration:
        media servers hosting the actuall videos and clients to access them
      - rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work)
 
+
+EXTENSION api:
+    oxff = OxFF() //is site is not allowed, ask if user wants to allow domaing to use oxff
+
+    oxff.update() //check for new files, would be nicer if that would be somehow automatic and not needed
+
+    oxff.archives() //return list of archive names
+
+    //new archive
+    archive = oxff.archive('name')
+    archive.setLocation() //opens file dialog to set location of archive
+
+    //get files
+    archive.files()
+    archive.files(since) //unixtimestamp, get new/deleted/... since
+
+    //get info
+    oxff.get(oshash) //retuns info
+    //get media
+    oxff.get(oshash, 'stills')   //retuns stills dict or None if not extracted
+    oxff.get(oshash, '96p.webm') //returns video location or none if not extracted
+
+    //extract media
+    oxff.extract(oshash, 'stills')
+    oxff.extract(oshash, '96p.webm')
+
+    //questions
+        - how to upload media to site? somehow make use of  Firefogg
+        - could some of those requests take to long and require callbacks?
+
 TODO:
-    add fields:
-        make archive / file link via id?
-        
-        is extracted field enough or requires frames/video thingy
+    use api like approach as done in pand.do/ra backend code
 
     cache location, should cache be inside of archive, home folder or whats a good default.
     must be a config option in
diff --git a/media/test.js b/media/test.js
index bee0597..5c2be1f 100644
--- a/media/test.js
+++ b/media/test.js
@@ -91,13 +91,15 @@ $(function(){
         $m.append($files);
         return $m;
     }
-    backend.request('files', function(result) {
-        for_each_sorted(result, function(archive, movies) {
-            var $archive = $('<div>');
-            $archive.html(archive);
-            $('#movies').append($archive);
-            for_each_sorted(movies, function(movie, files) {
-                $archive.append(addMovie(movie, files));
+    backend.request('archives', {'site': '0xdb.org'}, function(result) {
+        $.each(result.archives, function(archive, path) {
+            backend.request('files', {'site': '0xdb.org', 'archive': archive}, function(result) {
+                var $archive = $('<div>');
+                $archive.html(archive);
+                $('#movies').append($archive);
+                for_each_sorted(result, function(movie, files) {
+                    $archive.append(addMovie(movie, files));
+                });
             });
         });
     });
diff --git a/oxd.py b/oxd.py
index 8c26b80..b23bcda 100644
--- a/oxd.py
+++ b/oxd.py
@@ -173,7 +173,6 @@ def extract_still(video, target, position):
     shutil.rmtree(framedir)
     return r == 0
 
-
 def extract_video(video, target, profile, info):
     if not os.path.exists(target):
         fdir = os.path.dirname(target)
@@ -228,8 +227,10 @@ def extract_video(video, target, profile, info):
         profile_cmd +=['-acodec', 'libvorbis']
 
     aspect = dar.ratio
+    #use 1:1 pixel aspect ratio if dar is close to that
     if abs(width/height - dar) < 0.02:
         aspect = '%s:%s' % (width, height)
+
     cmd = ['./ffmpeg', '-y', '-threads', '2',
         '-i', video
         ] + profile_cmd + [
@@ -262,7 +263,7 @@ class ExtractThread(Thread):
             self.db.extract.task_done()
 
 class Database(object):
-    def __init__(self, conn):
+    def __init__(self, db_conn):
 
         self.extract = Queue.Queue()
         for i in range(2):
@@ -270,16 +271,15 @@ class Database(object):
           t.setDaemon(True)
           t.start()
 
-        self.db_conn = conn
-        conn = self.conn()
-        c = conn.cursor()
+        self.db_conn = db_conn
+        conn, c = self.conn()
+
         c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''')
 
         if int(self.get('version', 0)) < 1:
             self.set('version', 1)
             db = [
                 '''CREATE TABLE IF NOT EXISTS file (
-                                archive varchar(1024),
                                 path varchar(1024) unique,
                                 folder varchar(1024),
                                 filename varchar(1024),
@@ -292,16 +292,16 @@ class Database(object):
                                 created INT,
                                 modified INT,
                                 deleted INT)''',
-                '''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''',
                 '''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''',
                 '''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''',
                 '''CREATE TABLE IF NOT EXISTS archive (
                                 site varchar(1024),
-                                name varchar(1024) unique,
-                                path varchar(1024) unique,
+                                name varchar(1024),
+                                path varchar(1024),
                                 updated INT,
                                 created INT,
-                                updating INT)''',
+                                updating INT,
+                                UNIQUE(site, name)))''',
                 '''CREATE TABLE IF NOT EXISTS derivative (
                                 oshash varchar(16),
                                 name varchar(1024),
@@ -311,72 +311,73 @@ class Database(object):
             for i in db:
                 c.execute(i)
 
-        c.execute('UPDATE archive set updating=0 WHERE 1=1')
+        c.execute('UPDATE archive set updating=0 WHERE updating!=0')
         conn.commit()
 
     def conn(self):
         conn = sqlite3.connect(self.db_conn, timeout=10)
-        conn.text_factory = str
-        return conn
+        conn.text_factory = sqlite3.OptimizedUnicode
+        return conn, conn.cursor()
 
     def get(self, key, default=None):
-        conn = self.conn()
-        c = conn.cursor()
+        conn, c = self.conn()
         c.execute('SELECT value FROM setting WHERE key = ?', (key, ))
         for row in c:
             return row[0]
         return default
     
     def set(self, key, value):
-        conn = self.conn()
-        c = conn.cursor()
+        conn, c = self.conn()
         c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value)))
         conn.commit()
 
-    def remove(self, path):
+    def remove_file(self, path):
+        conn, c = self.conn()
         sql = 'DELETE FROM file WHERE path=?'
-        conn = self.conn()
-        c = conn.cursor()
         c.execute(sql, (path, ))
+        conn.commit()
     
     #files
-    def get_file(self, oshash):
-        conn = self.conn()
-        c = conn.cursor()
+    def file(self, oshash):
+        conn, c = self.conn()
         f = {}
-        sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?'
+        sql = 'SELECT path, folder, filename, info FROM file WHERE oshash=?'
         c.execute(sql, (oshash, ))
         for row in c:
             f['path'] = row[0]
-            f['archive'] = row[1]
-            f['folder'] = row[2]
-            f['filename'] = row[3]
-            f['info'] = json.loads(row[4])
+            f['folder'] = row[1]
+            f['filename'] = row[2]
+            f['info'] = json.loads(row[3])
             break
         return f
 
-    def files(self, since=None):
-        conn = self.conn()
-        c = conn.cursor()
+    def files(self, site, archive, since=None):
+        conn, c = self.conn()
+        c.execute('SELECT path from archive where name=? AND site=?', (archive, site))
+        prefix = None
+        for row in c:
+            prefix = row[0]
+        if not prefix:
+            return {}
 
         def get_files(files, key, sql, t=()):
+            t = list(t) + [u"%s%%"%prefix]
+
             c.execute(sql, t)
             for row in c:
-                archive = row[0]
-                folder = row[1]
-                filename = row[2]
-                info = json.loads(row[3])
-                if not archive in files: files[archive]={}
+                folder = row[0]
+                filename = row[1]
+                info = json.loads(row[2])
                 if key:
-                    if not key in files[archive]: files[archive][key]={}
-                    if not folder in files[archive][key]: files[archive][key][folder]={}
-                    files[archive][key][folder][filename] = info
+                    if not key in files: files[key]={}
+                    if not folder in files[key]: files[key][folder]={}
+                    files[key][folder][filename] = info
                 else:
-                    if not folder in files[archive]: files[archive][folder]={}
-                    files[archive][folder][filename] = info
+                    if not folder in files: files[folder]={}
+                    files[folder][filename] = info
         files = {}
-        sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE '
-        sql_postfix = ' deleted < 0 ORDER BY path'
+        sql_prefix = 'SELECT folder, filename, info FROM file WHERE '
+        sql_postfix = ' deleted < 0 AND path LIKE ? ORDER BY path'
         if since:
             get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, ))
             get_files(files, 'modified',
@@ -389,9 +390,7 @@ class Database(object):
 
     #derivative
     def derivative(self, oshash, name, status=None):
-        conn = self.conn()
-        c = conn.cursor()
-
+        conn, c = self.conn()
         d = {}
         d['oshash'] = oshash
         d['name'] = name
@@ -417,8 +416,7 @@ class Database(object):
         return d
 
     def derivatives(self, oshash, status=STATUS_AVAILABLE):
-        conn = self.conn()
-        c = conn.cursor()
+        conn, c = self.conn()
         derivatives = []
         sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?'
         c.execute(sql, (status, oshash))
@@ -427,7 +425,7 @@ class Database(object):
         return derivatives
 
     def extract_derivative(self, oshash, name):
-        f = self.get_file(oshash)
+        f = self.file(oshash)
         derivative = self.derivative(oshash, name)
         if derivative['status'] == STATUS_NEW:
             if name.endswith('.png'):
@@ -450,16 +448,15 @@ class Database(object):
                     self.derivative(oshash, name, STATUS_FAILED)
 
     #archive
-    def update(self, archive, path, folder, filename):
-        update = True
+    def update(self, path, folder, filename):
+        conn, c = self.conn()
 
+        update = True
         modified = time.mktime(time.localtime())
         created = modified
 
-        sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?'
-        conn = self.conn()
-        c = conn.cursor()
-        c.execute(sql, (path, ))
+        sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE deleted < 0 AND path=?'
+        c.execute(sql, [path])
         stat = os.stat(path)
         for row in c:
             if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]:
@@ -472,59 +469,86 @@ class Database(object):
                 info[key] = getattr(stat, 'st_'+key)
             oshash = info['oshash']
             deleted = -1
-            t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
+            t = (path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
                  stat.st_size, json.dumps(info), created, modified, deleted)
-            c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
+            c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
             conn.commit()
 
-    def spider(self, archive):
-        path = self.archives()[archive]
+    def spider(self, path):
         path = os.path.normpath(path)
+        files = []
         for dirpath, dirnames, filenames in os.walk(path):
+            if isinstance(dirpath, str):
+                dirpath = dirpath.decode('utf-8')
             if filenames:
                 prefix = dirpath[len(path)+1:]
                 for filename in filenames:
+                    if isinstance(filename, str):
+                        filename = filename.decode('utf-8')
                     if not filename.startswith('._') and not filename in ('.DS_Store', ):
-                        print dirpath, filename
-                        self.update(archive, os.path.join(dirpath, filename), prefix, filename)
+                        file_path = os.path.join(dirpath, filename)
+                        files.append(file_path)
+                        self.update(file_path, prefix, filename)
+
+        conn, c = self.conn()
+        c.execute('SELECT path FROM file WHERE path LIKE ? AND deleted < 0', ["%s%%"%path])
+        known_files = [r[0] for r in c.fetchall()]
+        deleted_files = filter(lambda f: f not in files, known_files)
+        '''
+        print 'known'
+        print json.dumps(known_files, indent=2)
+        print 'spidered'
+        print json.dumps(files, indent=2)
+        '''
+        print 'now delete'
+        print json.dumps(deleted_files, indent=2)
+        if deleted_files:
+            deleted = time.mktime(time.localtime())
+            for f in deleted_files:
+                c.execute('UPDATE file SET deleted=? WHERE path=?', (deleted, f))
+            conn.commit()
 
     def add_archive(self, site, name, path):
+        conn, c = self.conn()
         path = os.path.normpath(path)
-        conn = self.conn()
-        c = conn.cursor()
         created = time.mktime(time.localtime())
         t = (site, name, path, created, created)
+        #FIXME: check if site/name exists or deal with error here
         c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t)
         conn.commit()
 
-    def archives(self):
-        conn = self.conn()
-        c = conn.cursor()
-        sql = 'SELECT name, path FROM archive ORDER BY name';
-        c.execute(sql)
+    def archives(self, site):
+        conn, c = self.conn()
+        sql = 'SELECT name, path FROM archive WHERE site=? ORDER BY name';
+        c.execute(sql, [site])
         archives = {}
         for row in c:
             archives[row[0]] = row[1]
         return archives
 
     def update_archives(self):
-        conn = self.conn()
-        c = conn.cursor()
-        c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name');
-        for row in c:
-            name = row[0]
-            c.execute(u'UPDATE archive set updating=1 where name=?', (name, ))
+        conn, c = self.conn()
+        c.execute('SELECT path FROM archive WHERE updating = 0 GROUP BY path ORDER BY path')
+        paths = [r[0] for r in c.fetchall()]
+        def not_subpath(path):
+            for p in paths:
+                if p != path and path.startswith(p):
+                    return False
+            return True
+        paths = filter(not_subpath, paths)
+        for path in paths:
+            c.execute(u'UPDATE archive SET updating=1 WHERE path LIKE ?', ['%s%%'%path])
             conn.commit()
-            self.spider(name)
+            self.spider(path)
             updated = time.mktime(time.localtime())
-            c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name))
+            c.execute(u'UPDATE archive SET updated=?, updating=0 WHERE path LIKE ?', (updated, '%s%%'%path))
             conn.commit()
 
-    def remove_archive(self, name):
-        conn = self.conn()
-        c = conn.cursor()
-        c.execute('DELETE FROM archive WHERE path=?', (path, ))
-        c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, ))
+    def remove_archive(self, site, name):
+        conn, c = self.conn()
+        c.execute('DELETE FROM archive WHERE site=? AND name=?', [site, name])
+        #fixme, files could be still used by subarchive
+        #c.execute('DELETE FROM file WHERE path LIKE ?', ["%s%%"%path])
         conn.commit()
 
 #web
@@ -544,7 +568,7 @@ class OxControl(Resource):
         self.putChild("media", File(self.db.get('media_cache', 'media')))
 
         #FIXME: this is just for debugging
-        if not 'Test' in self.db.archives():
+        if not 'Test' in self.db.archives('0xdb.org'):
             self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies')
 
     def putChild(self, name, child):
@@ -557,16 +581,46 @@ class OxControl(Resource):
         return self
 
     def render_GET(self, request):
+        if request.path == '/add_archive':
+            args = {}
+            for arg in ('site', 'name', 'path'):
+                args[arg] = request.args.get(arg)[0]
+            self.db.add_archive(**arg)
+            response = {'status': 'ok'}
+            return json_response(request, response)
+
+        if request.path == '/remove_archive':
+            args = {}
+            for arg in ('site', 'name'):
+                args[arg] = request.args.get(arg)[0]
+            self.db.remove_archive(**arg)
+            response = {'status': 'ok'}
+            return json_response(request, response)
+
+        if request.path == '/archives':
+            args = {}
+            for arg in ['site']:
+                args[arg] = request.args.get(arg)[0]
+            response = {}
+            response['archives'] = self.db.archives(**args)
+            return json_response(request, response)
+
         if request.path == '/files':
             """
             /files
-             optional ?since=unixtimestamp
-                new/modified
-            files by archive
+                archive  archive name
+                site     site name
+                since (optional) timestamp, return changes since
+           files in archive
             """
-            since = request.args.get("since", None)
-            if since: since = float(since[0])
-            files = self.db.files(since)
+            args = {}
+            for arg in ['site', 'archive']:
+                args[arg] = request.args[arg][0]
+            since = request.args.get("since", [None])[0]
+            if since:
+                args['since'] = float(since)
+
+            files = self.db.files(**args)
             return json_response(request, files)
 
         if request.path == '/update':
@@ -583,12 +637,12 @@ class OxControl(Resource):
             extract derivatives from videos
             """
             oshash = request.args.get("oshash", [None])[0]
-            media = request.args.get("media", [None, ])[0]
-            retry = request.args.get("retry", [None, ])[0]
+            media = request.args.get("media", [None])[0]
+            retry = request.args.get("retry", [None])[0]
 
             response = {'status': 'not enough data provided'}
 
-            f = self.db.get_file(oshash)
+            f = self.db.file(oshash)
             if not f:
                 response = {'status': 'unkown oshash'}
             elif not 'duration' in f['info']:
@@ -625,18 +679,19 @@ class OxControl(Resource):
         if request.path == '/get':
             """
             get information about a file, including derivatives
+            oshash - oshash of file
             """
             oshash = request.args.get("oshash", [None, ])[0]
             response = {'status': 'no oshash provided'}
             if oshash:
-                f = self.db.get_file(oshash)
+                f = self.db.file(oshash)
                 response['status'] = 'available'
                 response['info'] = f['info']
                 files = [f['location'] for f in self.db.derivatives(oshash)]
                 response['video'] = filter(lambda f: f.endswith('.webm'), files)
                 response['stills'] = filter(lambda f: f.endswith('.png'), files)
-
             return json_response(request, response)
+
         return "<!DOCTYPE html><html>this is not for humans</html>"
 
 if __name__ == '__main__':