# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 # GPL 2010 from __future__ import division, with_statement """ on ubuntu/debian: apt-get install python-twisted else easy_install twisted twisted available on os x since 10.5 FRAME extraction: - vlc hangs on some files, mpg but also some avis - ffmpeg decodes full video, so it takes to long extracting frames at the end - oxframe only support ogv and webm (adding av* as option might work) - mplayer seams to work. might be an issue installing/bundling it FIREFOX integration: possible ways: - launch oxbackend on localhost and connect to it - way to add/configure backends + launch one localy adding folders in remote backends is a bit complicated beeing able to manage a backend remotely would be nice(security questions though) also makes it a bit more complicated, than again ideal for situations with media servers hosting the actuall videos and clients to access them - rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work) TODO: security, add auth framework, DIGEST might be good enough add fields: make archive / file link via id? is extracted field enough or requires frames/video thingy cache location, should cache be inside of archive, home folder or whats a good default. must be a config option in """ """ /files?since=timestamp { archive: { new updated deleted } } /extract?oshash=abc&media=stills /extract?oshash=abc&media=profile.webm { status: 'extracting|available|failed', } /get?oshash=abc { info: {} stills: [], video: [], } /get?oshash=abc { stills: [ "/media/../../12.png", "/media/../../123.png", "/media/../../321.png", ], video: [ "/media/../../96p.webm" ], info: {} } """ import fractions from glob import glob import json import os import re import Queue import sqlite3 import subprocess import sys import shutil import tempfile import time import thread from threading import Thread from twisted.cred.portal import IRealm, Portal from twisted.cred.checkers import InMemoryUsernamePasswordDatabaseDontUse from twisted.internet import task, reactor from twisted.web import server from twisted.web.guard import HTTPAuthSessionWrapper, DigestCredentialFactory from twisted.web.resource import Resource, IResource from twisted.web.static import File from zope.interface import implements STATUS_NEW=0 STATUS_EXTRACTING=1 STATUS_AVAILABLE=2 STATUS_FAILED=3 VIDEO_PROFILES = [ '720p', '480p', '360p', '96p', ] class AspectRatio(fractions.Fraction): def __new__(cls, numerator, denominator=None): if not denominator: ratio = map(int, numerator.split(':')) if len(ratio) == 1: ratio.append(1) numerator = ratio[0] denominator = ratio[1] #if its close enough to the common aspect ratios rather use that if abs(numerator/denominator - 4/3) < 0.03: numerator = 4 denominator = 3 elif abs(numerator/denominator - 16/9) < 0.02: numerator = 16 denominator = 9 return super(AspectRatio, cls).__new__(cls, numerator, denominator) @property def ratio(self): return "%d:%d" % (self.numerator, self.denominator) def avinfo(filename): if os.path.getsize(filename): p = subprocess.Popen(['ffmpeg2theora', '--info', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) info, error = p.communicate() #remove metadata, can be broken reg = re.compile('"metadata": {.*?},', re.DOTALL) info = re.sub(reg, '', info) try: info = json.loads(info.decode('utf-8', 'replace')) except: print info, error if 'video' in info and info['video']: if not 'display_aspect_ratio' in info['video'][0]: dar = AspectRatio(info['video'][0]['width'], info['video'][0]['height']) info['video'][0]['display_aspect_ratio'] = dar.ratio del info['path'] if os.path.splitext(filename)[-1] in ('.srt', '.sub', '.idx', '.rar') and 'error' in info: del info['error'] if 'code' in info and info['code'] == 'badfile': del info['code'] return info return {'path': filename, 'size': 0} def hash_prefix(h): return [h[:2], h[2:4], h[4:6], h[6:]] def extract_all_stills(): db = Database('dev.sqlite') conn = db.conn() c = conn.cursor() sql = 'SELECT path, oshash, info FROM file' c.execute(sql) for row in c: video = row[0] oshash = row[1] info = json.loads(row[2]) if not 'Extras/' in video and 'video' in info and info['video']: prefix = os.path.join('media', os.path.join(*hash_prefix(oshash))) print video extract_stills(video, prefix, info) def run_command(cmd, timeout=25): p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) while timeout > 0: time.sleep(0.2) timeout -= 0.2 if p.poll() != None: return p.returncode if p.poll() == None: os.kill(p.pid, 9) killedpid, stat = os.waitpid(p.pid, os.WNOHANG) return p.returncode def extract_still(video, target, position): fdir = os.path.dirname(target) if fdir and not os.path.exists(fdir): os.makedirs(fdir) ''' #oxframe #this only works with theora and webm files!!! cmd = ['oxframe', '-p', str(position), '-i', video, '-o', frame] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) info, error = p.communicate() ''' ''' #ffmpeg #decodes all the wat to position, that takes to long cmd = ['ffmpeg', '-i', video, '-vframes', '1','-ss', str(position), '-f','image2', target] print cmd p = subprocess.Popen(cmd) p.wait() ''' ''' #VLC #hangs on mpg and some avi files with old divx3 video out = position + 0.2 framedir = tempfile.mkdtemp() vlc_path = 'vlc' for i in ("/Applications/VLC.app/Contents/MacOS/VLC", ): if os.path.exists(i): vlc_path = i cmd = [ vlc_path, '--vout=dummy', video, '--start-time=%s'%position, '--stop-time=%s'%out, '-I', 'dummy', '--video-filter=scene', '--scene-path=%s'%framedir, '--scene-format=png', '--scene-ratio=25', '--scene-prefix=still', '--swscale-mode=2', '--sout-transcode-vcodec=avcodec', '--noaudio', 'vlc://quit', ] #print cmd run_command(cmd) images = glob('%s/still*.png' % framedir) if images: shutil.move(images[0], target) shutil.rmtree(framedir) ''' #mplayer cwd = os.getcwd() target = os.path.abspath(target) framedir = tempfile.mkdtemp() os.chdir(framedir) cmd = ['mplayer', '-noautosub', video, '-ss', str(position), '-frames', '2', '-vo', 'png:z=9', '-ao', 'null'] r = run_command(cmd) images = glob('%s/*.png' % framedir) if images: shutil.move(images[-1], target) os.chdir(cwd) shutil.rmtree(framedir) return r == 0 def extract_video(video, target, profile, info): if not os.path.exists(target): fdir = os.path.dirname(target) if not os.path.exists(fdir): os.makedirs(fdir) dar = AspectRatio(info['video'][0]['display_aspect_ratio']) profile_cmd = [] ''' look into lag mb_static_threshold qmax/qmin rc_buf_aggressivity=0.95 token_partitions=4 level / speedlevel bt? ''' if profile == '720p': height = 720 width = int(dar * height) profile_cmd = ['-vb', '2M', '-g', '250'] if info['audio']: profile_cmd += ['-ar', '48000', '-aq', '5'] if profile == '480p': height = 480 width = int(dar * height) profile_cmd = ['-vb', '1400k', '-g', '250'] if info['audio']: profile_cmd += ['-ar', '44100', '-aq', '2'] if 'channels' in info['audio'][0] and info['audio'][0]['channels'] > 2: profile_cmd += ['-ac', '2'] elif profile == '360p': height = 360 width = int(dar * height) profile_cmd = ['-vb', '768k'] if info['audio']: profile_cmd += ['-ar', '44100', '-aq', '1'] if 'channels' in info['audio'][0] and info['audio'][0]['channels'] > 2: profile_cmd += ['-ac', '2'] else: height = 96 width = int(dar * height) profile_cmd = ['-vb', '96k', '-g', '50'] if info['audio']: profile_cmd += ['-ar', '22050', '-ac', '1', '-aq', '-1'] if 'channels' in info['audio'][0] and info['audio'][0]['channels'] > 1: profile_cmd += ['-ac', '1'] if info['audio']: profile_cmd +=['-acodec', 'libvorbis'] aspect = dar.ratio if abs(width/height - dar) < 0.02: aspect = '%s:%s' % (width, height) cmd = ['./ffmpeg', '-y', '-threads', '2', '-i', video ] + profile_cmd + [ '-s', '%dx%d'%(width, height), '-aspect', aspect, '-f','webm', target] print cmd #r = run_command(cmd, -1) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) p.wait() r = p.returncode print "done" return r == 0 def video_frame_positions(duration): pos = duration / 2 #return [pos/4, pos/2, pos/2+pos/4, pos, pos+pos/2, pos+pos/2+pos/4] return map(int, [pos/2, pos, pos+pos/2]) class ExtractThread(Thread): def __init__(self, db): Thread.__init__(self) self.db = db def run(self): while True: oshash, name = self.db.extract.get() self.db.extract_derivative(oshash, name) self.db.extract.task_done() class Database(object): def __init__(self, conn): self.extract = Queue.Queue() for i in range(2): t = ExtractThread(self) t.setDaemon(True) t.start() self.db_conn = conn conn = self.conn() c = conn.cursor() c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''') if int(self.get('version', 0)) < 1: self.set('version', 1) db = [ '''CREATE TABLE IF NOT EXISTS file ( archive varchar(1024), path varchar(1024) unique, folder varchar(1024), filename varchar(1024), oshash varchar(16), atime FLOAT, ctime FLOAT, mtime FLOAT, size INT, info TEXT, created INT, modified INT, deleted INT)''', '''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''', '''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''', '''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''', '''CREATE TABLE IF NOT EXISTS archive ( site varchar(1024), name varchar(1024) unique, path varchar(1024) unique, updated INT, created INT, updating INT)''', '''CREATE TABLE IF NOT EXISTS derivative ( oshash varchar(16), name varchar(1024), status INT, UNIQUE(oshash, name))''', ] for i in db: c.execute(i) c.execute('UPDATE archive set updating=0 WHERE 1=1') conn.commit() def conn(self): conn = sqlite3.connect(self.db_conn, timeout=10) conn.text_factory = str return conn def get(self, key, default=None): conn = self.conn() c = conn.cursor() c.execute('SELECT value FROM setting WHERE key = ?', (key, )) for row in c: return row[0] return default def set(self, key, value): conn = self.conn() c = conn.cursor() c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value))) conn.commit() def remove(self, path): sql = 'DELETE FROM file WHERE path=?' conn = self.conn() c = conn.cursor() c.execute(sql, (path, )) #files def get_file(self, oshash): conn = self.conn() c = conn.cursor() f = {} sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?' c.execute(sql, (oshash, )) for row in c: f['path'] = row[0] f['archive'] = row[1] f['folder'] = row[2] f['filename'] = row[3] f['info'] = json.loads(row[4]) break return f def files(self, since=None): conn = self.conn() c = conn.cursor() def get_files(files, key, sql, t=()): c.execute(sql, t) for row in c: archive = row[0] folder = row[1] filename = row[2] info = json.loads(row[3]) if not archive in files: files[archive]={} if key: if not key in files[archive]: files[archive][key]={} if not folder in files[archive][key]: files[archive][key][folder]={} files[archive][key][folder][filename] = info else: if not folder in files[archive]: files[archive][folder]={} files[archive][folder][filename] = info files = {} sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE ' sql_postfix = ' deleted < 0 ORDER BY path' if since: get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, )) get_files(files, 'modified', sql_prefix + 'created < ? AND modified >= ? AND'+sql_postfix, (since, since)) get_files(files, 'new', sql_prefix + 'created >= ? AND'+sql_postfix, (since, )) else: get_files(files, None, sql_prefix + sql_postfix) return files #derivative def derivative(self, oshash, name, status=None): conn = self.conn() c = conn.cursor() d = {} d['oshash'] = oshash d['name'] = name d['status'] = status if status == None: sql = 'SELECT status FROM derivative WHERE oshash=? AND name=?' c.execute(sql, (oshash, name)) for row in c: d['status'] = row[0] if d['status'] == None: #this is a new derivative, add to db and add to enc queue return self.derivative(oshash, name, STATUS_NEW) else: print "insert or update derivative", oshash, name, status c.execute(u'INSERT OR REPLACE INTO derivative values (?, ?, ?)', (oshash, name, status)) conn.commit() prefix = hash_prefix(oshash) path_prefix = os.path.join(self.get('media_cache', 'media'), *prefix) d['path'] = os.path.join(path_prefix, name) d['location'] = '/'.join(['/media', ] + prefix + [name, ]) return d def derivatives(self, oshash, status=STATUS_AVAILABLE): conn = self.conn() c = conn.cursor() derivatives = [] sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?' c.execute(sql, (status, oshash)) for row in c: derivatives.append(self.derivative(oshash, row[0])) return derivatives def extract_derivative(self, oshash, name): f = self.get_file(oshash) derivative = self.derivative(oshash, name) if derivative['status'] == STATUS_NEW: if name.endswith('.png'): for pos in video_frame_positions(f['info']['duration']): still_name = '%s.png' % pos still_d = self.derivative(oshash, still_name) if still_d['status'] == STATUS_NEW: self.derivative(oshash, still_name, STATUS_EXTRACTING) if extract_still(f['path'], still_d['path'], pos): self.derivative(oshash, still_name, STATUS_AVAILABLE) else: self.derivative(oshash, still_name, STATUS_FAILED) elif name.endswith('.webm'): profile = name[:-5] print 'now lets go, are we having fun?' self.derivative(oshash, name, STATUS_EXTRACTING) if extract_video(f['path'], derivative['path'], profile, f['info']): self.derivative(oshash, name, STATUS_AVAILABLE) else: self.derivative(oshash, name, STATUS_FAILED) #archive def update(self, archive, path, folder, filename): update = True modified = time.mktime(time.localtime()) created = modified sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?' conn = self.conn() c = conn.cursor() c.execute(sql, (path, )) stat = os.stat(path) for row in c: if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]: created = row[4] update = False break if update: info = avinfo(path) for key in ('atime', 'ctime', 'mtime'): info[key] = getattr(stat, 'st_'+key) oshash = info['oshash'] deleted = -1 t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime, stat.st_size, json.dumps(info), created, modified, deleted) c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t) conn.commit() def spider(self, archive): path = self.archives()[archive] path = os.path.normpath(path) for dirpath, dirnames, filenames in os.walk(path): if filenames: prefix = dirpath[len(path)+1:] for filename in filenames: if not filename.startswith('._') and not filename in ('.DS_Store', ): print dirpath, filename self.update(archive, os.path.join(dirpath, filename), prefix, filename) def add_archive(self, site, name, path): path = os.path.normpath(path) conn = self.conn() c = conn.cursor() created = time.mktime(time.localtime()) t = (site, name, path, created, created) c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t) conn.commit() def archives(self): conn = self.conn() c = conn.cursor() sql = 'SELECT name, path FROM archive ORDER BY name'; c.execute(sql) archives = {} for row in c: archives[row[0]] = row[1] return archives def update_archives(self): conn = self.conn() c = conn.cursor() c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name'); for row in c: name = row[0] c.execute(u'UPDATE archive set updating=1 where name=?', (name, )) conn.commit() self.spider(name) updated = time.mktime(time.localtime()) c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name)) conn.commit() def remove_archive(self, name): conn = self.conn() c = conn.cursor() c.execute('DELETE FROM archive WHERE path=?', (path, )) c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, )) conn.commit() #web def json_response(request, data): request.headers['Content-Type'] = 'text/javascript' return json.dumps(data, indent=2) class OxControl(Resource): _children = [] #isLeaf = True def __init__(self, db_path): self.db_path = db_path Resource.__init__(self) self.db = Database(self.db_path) self.putChild("media", File(self.db.get('media_cache', 'media'))) #FIXME: this is just for debugging if not 'Test' in self.db.archives(): self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies') def putChild(self, name, child): self._children.append(name) return Resource.putChild(self, name, child) def getChild(self, name, request): if name in self._children: return Resource.getChild(self, name, request) return self def render_GET(self, request): if request.path == '/files': """ /files optional ?since=unixtimestamp new/modified files by archive """ since = request.args.get("since", None) if since: since = float(since[0]) files = self.db.files(since) return json_response(request, files) if request.path == '/update': """ checks for new files in all known archives """ #update in another thread, this otherwise blocks web server thread.start_new_thread(self.db.update_archives,()) response = {'status': 'ok'} return json_response(request, response) if request.path == '/extract': """ extract derivatives from videos """ oshash = request.args.get("oshash", [None])[0] media = request.args.get("media", [None, ])[0] retry = request.args.get("retry", [None, ])[0] response = {'status': 'not enough data provided'} f = self.db.get_file(oshash) if not f: response = {'status': 'unkown oshash'} elif not 'duration' in f['info']: response = {'status': 'unkown format, can not extract data'} else: if media == 'stills': name = '%s.png'%video_frame_positions(f['info']['duration'])[0] elif media.endswith('.webm'): profile = media[:-5] if profile in VIDEO_PROFILES: name = media else: response = {'status': 'unsupported video profile requested'} if name: #get or create derivative derivative = self.db.derivative(oshash, name) if derivative['status'] == STATUS_FAILED and retry: derivative = self.db.derivative(oshash, name, STATUS_NEW) response['status'] = { STATUS_NEW: 'extracting', STATUS_EXTRACTING: 'extracting', STATUS_AVAILABLE: 'available', STATUS_FAILED: 'failed', }.get(derivative['status'], 'extracting') if derivative['status'] == STATUS_NEW: self.db.extract.put((oshash, name)) files = [f['location'] for f in self.db.derivatives(oshash)] if media == 'stills': response['stills'] = filter(lambda f: f.endswith('.png'), files) else: response['video'] = filter(lambda f: f.endswith('.webm'), files) return json_response(request, response) if request.path == '/get': """ get information about a file, including derivatives """ oshash = request.args.get("oshash", [None, ])[0] response = {'status': 'no oshash provided'} if oshash: f = self.db.get_file(oshash) response['status'] = 'available' response['info'] = f['info'] files = [f['location'] for f in self.db.derivatives(oshash)] response['video'] = filter(lambda f: f.endswith('.webm'), files) response['stills'] = filter(lambda f: f.endswith('.png'), files) return json_response(request, response) return "this is not for humans" if __name__ == '__main__': db = 'dev.sqlite' port = 2620 username = 'fix' password = 'me' interface = '127.0.0.1' interface = '10.26.20.10' interface = '0.0.0.0' print 'http://%s:%d/' % (interface, port) root = OxControl(db) checker = InMemoryUsernamePasswordDatabaseDontUse() checker.addUser(username, password) class PublicHTMLRealm(object): implements(IRealm) def requestAvatar(self, avatarId, mind, *interfaces): if IResource in interfaces: return (IResource, root, lambda: None) raise NotImplementedError() portal = Portal(PublicHTMLRealm(), [checker]) credentialFactory = DigestCredentialFactory("md5", "oxbackend") resource = HTTPAuthSessionWrapper(portal, [credentialFactory]) site = server.Site(resource) reactor.listenTCP(port, site, interface=interface) reactor.run()