oxd/oxd.py
2010-07-30 15:24:50 +02:00

743 lines
25 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2010
from __future__ import division, with_statement
"""
on ubuntu/debian:
apt-get install python-twisted
else
easy_install twisted
twisted available on os x since 10.5
FRAME extraction:
- vlc hangs on some files, mpg but also some avis
- ffmpeg decodes full video, so it takes to long extracting frames at the end
- oxframe only support ogv and webm (adding av* as option might work)
- mplayer seams to work. might be an issue installing/bundling it
FIREFOX integration:
possible ways:
- launch oxbackend on localhost and connect to it
- way to add/configure backends + launch one localy
adding folders in remote backends is a bit complicated
beeing able to manage a backend remotely would be nice(security questions though)
also makes it a bit more complicated, than again ideal for situations with
media servers hosting the actuall videos and clients to access them
- rewrite large parts in javascript + sqlite bundled with firefox (requires js subprocess to work)
TODO:
security, add auth framework, DIGEST might be good enough
add fields:
make archive / file link via id?
is extracted field enough or requires frames/video thingy
cache location, should cache be inside of archive, home folder or whats a good default.
must be a config option in
"""
"""
/files?since=timestamp
{
archive: {
new
updated
deleted
}
}
/extract?oshash=abc&media=stills
/extract?oshash=abc&media=profile.webm
{
status: 'extracting|available|failed',
}
/get?oshash=abc
{
info: {}
stills: [],
video: [],
}
/get?oshash=abc
{
stills: [
"/media/../../12.png",
"/media/../../123.png",
"/media/../../321.png",
],
video: [
"/media/../../96p.webm"
],
info: {}
}
"""
import fractions
from glob import glob
import json
import os
import re
import Queue
import sqlite3
import subprocess
import sys
import shutil
import tempfile
import time
import thread
from threading import Thread
from twisted.cred.portal import IRealm, Portal
from twisted.cred.checkers import InMemoryUsernamePasswordDatabaseDontUse
from twisted.internet import task, reactor
from twisted.web import server
from twisted.web.guard import HTTPAuthSessionWrapper, DigestCredentialFactory
from twisted.web.resource import Resource, IResource
from twisted.web.static import File
from zope.interface import implements
STATUS_NEW=0
STATUS_EXTRACTING=1
STATUS_AVAILABLE=2
STATUS_FAILED=3
VIDEO_PROFILES = [
'720p',
'480p',
'360p',
'96p',
]
class AspectRatio(fractions.Fraction):
def __new__(cls, numerator, denominator=None):
if not denominator:
ratio = map(int, numerator.split(':'))
if len(ratio) == 1: ratio.append(1)
numerator = ratio[0]
denominator = ratio[1]
#if its close enough to the common aspect ratios rather use that
if abs(numerator/denominator - 4/3) < 0.03:
numerator = 4
denominator = 3
elif abs(numerator/denominator - 16/9) < 0.02:
numerator = 16
denominator = 9
return super(AspectRatio, cls).__new__(cls, numerator, denominator)
@property
def ratio(self):
return "%d:%d" % (self.numerator, self.denominator)
def avinfo(filename):
if os.path.getsize(filename):
p = subprocess.Popen(['ffmpeg2theora', '--info', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
info, error = p.communicate()
#remove metadata, can be broken
reg = re.compile('"metadata": {.*?},', re.DOTALL)
info = re.sub(reg, '', info)
try:
info = json.loads(info.decode('utf-8', 'replace'))
except:
print info, error
if 'video' in info and info['video']:
if not 'display_aspect_ratio' in info['video'][0]:
dar = AspectRatio(info['video'][0]['width'], info['video'][0]['height'])
info['video'][0]['display_aspect_ratio'] = dar.ratio
del info['path']
if os.path.splitext(filename)[-1] in ('.srt', '.sub', '.idx', '.rar') and 'error' in info:
del info['error']
if 'code' in info and info['code'] == 'badfile':
del info['code']
return info
return {'path': filename, 'size': 0}
def hash_prefix(h):
return [h[:2], h[2:4], h[4:6], h[6:]]
def extract_all_stills():
db = Database('dev.sqlite')
conn = db.conn()
c = conn.cursor()
sql = 'SELECT path, oshash, info FROM file'
c.execute(sql)
for row in c:
video = row[0]
oshash = row[1]
info = json.loads(row[2])
if not 'Extras/' in video and 'video' in info and info['video']:
prefix = os.path.join('media', os.path.join(*hash_prefix(oshash)))
print video
extract_stills(video, prefix, info)
def run_command(cmd, timeout=25):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
while timeout > 0:
time.sleep(0.2)
timeout -= 0.2
if p.poll() != None:
return p.returncode
if p.poll() == None:
os.kill(p.pid, 9)
killedpid, stat = os.waitpid(p.pid, os.WNOHANG)
return p.returncode
def extract_still(video, target, position):
fdir = os.path.dirname(target)
if fdir and not os.path.exists(fdir):
os.makedirs(fdir)
'''
#oxframe
#this only works with theora and webm files!!!
cmd = ['oxframe', '-p', str(position), '-i', video, '-o', frame]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
info, error = p.communicate()
'''
'''
#ffmpeg
#decodes all the wat to position, that takes to long
cmd = ['ffmpeg', '-i', video, '-vframes', '1','-ss', str(position), '-f','image2', target]
print cmd
p = subprocess.Popen(cmd)
p.wait()
'''
'''
#VLC
#hangs on mpg and some avi files with old divx3 video
out = position + 0.2
framedir = tempfile.mkdtemp()
vlc_path = 'vlc'
for i in ("/Applications/VLC.app/Contents/MacOS/VLC", ):
if os.path.exists(i):
vlc_path = i
cmd = [
vlc_path, '--vout=dummy', video, '--start-time=%s'%position, '--stop-time=%s'%out,
'-I', 'dummy', '--video-filter=scene', '--scene-path=%s'%framedir,
'--scene-format=png', '--scene-ratio=25', '--scene-prefix=still', '--swscale-mode=2',
'--sout-transcode-vcodec=avcodec', '--noaudio', 'vlc://quit',
]
#print cmd
run_command(cmd)
images = glob('%s/still*.png' % framedir)
if images:
shutil.move(images[0], target)
shutil.rmtree(framedir)
'''
#mplayer
cwd = os.getcwd()
target = os.path.abspath(target)
framedir = tempfile.mkdtemp()
os.chdir(framedir)
cmd = ['mplayer', '-noautosub', video, '-ss', str(position), '-frames', '2', '-vo', 'png:z=9', '-ao', 'null']
r = run_command(cmd)
images = glob('%s/*.png' % framedir)
if images:
shutil.move(images[-1], target)
os.chdir(cwd)
shutil.rmtree(framedir)
return r == 0
def extract_video(video, target, profile, info):
if not os.path.exists(target):
fdir = os.path.dirname(target)
if not os.path.exists(fdir):
os.makedirs(fdir)
dar = AspectRatio(info['video'][0]['display_aspect_ratio'])
profile_cmd = []
'''
look into
lag
mb_static_threshold
qmax/qmin
rc_buf_aggressivity=0.95
token_partitions=4
level / speedlevel
bt?
'''
if profile == '720p':
height = 720
width = int(dar * height)
profile_cmd = ['-vb', '2M', '-g', '250']
if info['audio']:
profile_cmd += ['-ar', '48000', '-aq', '5']
if profile == '480p':
height = 480
width = int(dar * height)
profile_cmd = ['-vb', '1400k', '-g', '250']
if info['audio']:
profile_cmd += ['-ar', '44100', '-aq', '2']
if 'channels' in info['audio'][0] and info['audio'][0]['channels'] > 2:
profile_cmd += ['-ac', '2']
elif profile == '360p':
height = 360
width = int(dar * height)
profile_cmd = ['-vb', '768k']
if info['audio']:
profile_cmd += ['-ar', '44100', '-aq', '1']
if 'channels' in info['audio'][0] and info['audio'][0]['channels'] > 2:
profile_cmd += ['-ac', '2']
else:
height = 96
width = int(dar * height)
profile_cmd = ['-vb', '96k', '-g', '50']
if info['audio']:
profile_cmd += ['-ar', '22050', '-ac', '1', '-aq', '-1']
if 'channels' in info['audio'][0] and info['audio'][0]['channels'] > 1:
profile_cmd += ['-ac', '1']
if info['audio']:
profile_cmd +=['-acodec', 'libvorbis']
aspect = dar.ratio
if abs(width/height - dar) < 0.02:
aspect = '%s:%s' % (width, height)
cmd = ['./ffmpeg', '-y', '-threads', '2',
'-i', video
] + profile_cmd + [
'-s', '%dx%d'%(width, height),
'-aspect', aspect,
'-f','webm',
target]
print cmd
#r = run_command(cmd, -1)
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
p.wait()
r = p.returncode
print "done"
return r == 0
def video_frame_positions(duration):
pos = duration / 2
#return [pos/4, pos/2, pos/2+pos/4, pos, pos+pos/2, pos+pos/2+pos/4]
return map(int, [pos/2, pos, pos+pos/2])
class ExtractThread(Thread):
def __init__(self, db):
Thread.__init__(self)
self.db = db
def run(self):
while True:
oshash, name = self.db.extract.get()
self.db.extract_derivative(oshash, name)
self.db.extract.task_done()
class Database(object):
def __init__(self, conn):
self.extract = Queue.Queue()
for i in range(2):
t = ExtractThread(self)
t.setDaemon(True)
t.start()
self.db_conn = conn
conn = self.conn()
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''')
if int(self.get('version', 0)) < 1:
self.set('version', 1)
db = [
'''CREATE TABLE IF NOT EXISTS file (
archive varchar(1024),
path varchar(1024) unique,
folder varchar(1024),
filename varchar(1024),
oshash varchar(16),
atime FLOAT,
ctime FLOAT,
mtime FLOAT,
size INT,
info TEXT,
created INT,
modified INT,
deleted INT)''',
'''CREATE INDEX IF NOT EXISTS archive_idx ON file (archive)''',
'''CREATE INDEX IF NOT EXISTS path_idx ON file (path)''',
'''CREATE INDEX IF NOT EXISTS oshash_idx ON file (oshash)''',
'''CREATE TABLE IF NOT EXISTS archive (
site varchar(1024),
name varchar(1024) unique,
path varchar(1024) unique,
updated INT,
created INT,
updating INT)''',
'''CREATE TABLE IF NOT EXISTS derivative (
oshash varchar(16),
name varchar(1024),
status INT,
UNIQUE(oshash, name))''',
]
for i in db:
c.execute(i)
c.execute('UPDATE archive set updating=0 WHERE 1=1')
conn.commit()
def conn(self):
conn = sqlite3.connect(self.db_conn, timeout=10)
conn.text_factory = str
return conn
def get(self, key, default=None):
conn = self.conn()
c = conn.cursor()
c.execute('SELECT value FROM setting WHERE key = ?', (key, ))
for row in c:
return row[0]
return default
def set(self, key, value):
conn = self.conn()
c = conn.cursor()
c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value)))
conn.commit()
def remove(self, path):
sql = 'DELETE FROM file WHERE path=?'
conn = self.conn()
c = conn.cursor()
c.execute(sql, (path, ))
#files
def get_file(self, oshash):
conn = self.conn()
c = conn.cursor()
f = {}
sql = 'SELECT path, archive, folder, filename, info FROM file WHERE oshash=?'
c.execute(sql, (oshash, ))
for row in c:
f['path'] = row[0]
f['archive'] = row[1]
f['folder'] = row[2]
f['filename'] = row[3]
f['info'] = json.loads(row[4])
break
return f
def files(self, since=None):
conn = self.conn()
c = conn.cursor()
def get_files(files, key, sql, t=()):
c.execute(sql, t)
for row in c:
archive = row[0]
folder = row[1]
filename = row[2]
info = json.loads(row[3])
if not archive in files: files[archive]={}
if key:
if not key in files[archive]: files[archive][key]={}
if not folder in files[archive][key]: files[archive][key][folder]={}
files[archive][key][folder][filename] = info
else:
if not folder in files[archive]: files[archive][folder]={}
files[archive][folder][filename] = info
files = {}
sql_prefix = 'SELECT archive, folder, filename, info FROM file WHERE '
sql_postfix = ' deleted < 0 ORDER BY path'
if since:
get_files(files, 'deleted', sql_prefix + 'deleted >= ? ORDER BY path' , (since, ))
get_files(files, 'modified',
sql_prefix + 'created < ? AND modified >= ? AND'+sql_postfix,
(since, since))
get_files(files, 'new', sql_prefix + 'created >= ? AND'+sql_postfix, (since, ))
else:
get_files(files, None, sql_prefix + sql_postfix)
return files
#derivative
def derivative(self, oshash, name, status=None):
conn = self.conn()
c = conn.cursor()
d = {}
d['oshash'] = oshash
d['name'] = name
d['status'] = status
if status == None:
sql = 'SELECT status FROM derivative WHERE oshash=? AND name=?'
c.execute(sql, (oshash, name))
for row in c:
d['status'] = row[0]
if d['status'] == None:
#this is a new derivative, add to db and add to enc queue
return self.derivative(oshash, name, STATUS_NEW)
else:
print "insert or update derivative", oshash, name, status
c.execute(u'INSERT OR REPLACE INTO derivative values (?, ?, ?)', (oshash, name, status))
conn.commit()
prefix = hash_prefix(oshash)
path_prefix = os.path.join(self.get('media_cache', 'media'), *prefix)
d['path'] = os.path.join(path_prefix, name)
d['location'] = '/'.join(['/media', ] + prefix + [name, ])
return d
def derivatives(self, oshash, status=STATUS_AVAILABLE):
conn = self.conn()
c = conn.cursor()
derivatives = []
sql = 'SELECT name FROM derivative WHERE status=? AND oshash=?'
c.execute(sql, (status, oshash))
for row in c:
derivatives.append(self.derivative(oshash, row[0]))
return derivatives
def extract_derivative(self, oshash, name):
f = self.get_file(oshash)
derivative = self.derivative(oshash, name)
if derivative['status'] == STATUS_NEW:
if name.endswith('.png'):
for pos in video_frame_positions(f['info']['duration']):
still_name = '%s.png' % pos
still_d = self.derivative(oshash, still_name)
if still_d['status'] == STATUS_NEW:
self.derivative(oshash, still_name, STATUS_EXTRACTING)
if extract_still(f['path'], still_d['path'], pos):
self.derivative(oshash, still_name, STATUS_AVAILABLE)
else:
self.derivative(oshash, still_name, STATUS_FAILED)
elif name.endswith('.webm'):
profile = name[:-5]
print 'now lets go, are we having fun?'
self.derivative(oshash, name, STATUS_EXTRACTING)
if extract_video(f['path'], derivative['path'], profile, f['info']):
self.derivative(oshash, name, STATUS_AVAILABLE)
else:
self.derivative(oshash, name, STATUS_FAILED)
#archive
def update(self, archive, path, folder, filename):
update = True
modified = time.mktime(time.localtime())
created = modified
sql = 'SELECT atime, ctime, mtime, size, created FROM file WHERE path=?'
conn = self.conn()
c = conn.cursor()
c.execute(sql, (path, ))
stat = os.stat(path)
for row in c:
if stat.st_atime == row[0] and stat.st_ctime == row[1] and stat.st_mtime == row[2] and stat.st_size == row[3]:
created = row[4]
update = False
break
if update:
info = avinfo(path)
for key in ('atime', 'ctime', 'mtime'):
info[key] = getattr(stat, 'st_'+key)
oshash = info['oshash']
deleted = -1
t = (archive, path, folder, filename, oshash, stat.st_atime, stat.st_ctime, stat.st_mtime,
stat.st_size, json.dumps(info), created, modified, deleted)
c.execute(u'INSERT OR REPLACE INTO file values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', t)
conn.commit()
def spider(self, archive):
path = self.archives()[archive]
path = os.path.normpath(path)
for dirpath, dirnames, filenames in os.walk(path):
if filenames:
prefix = dirpath[len(path)+1:]
for filename in filenames:
if not filename.startswith('._') and not filename in ('.DS_Store', ):
print dirpath, filename
self.update(archive, os.path.join(dirpath, filename), prefix, filename)
def add_archive(self, site, name, path):
path = os.path.normpath(path)
conn = self.conn()
c = conn.cursor()
created = time.mktime(time.localtime())
t = (site, name, path, created, created)
c.execute(u'INSERT INTO archive values (?, ?, ?, ?, ?, 0)', t)
conn.commit()
def archives(self):
conn = self.conn()
c = conn.cursor()
sql = 'SELECT name, path FROM archive ORDER BY name';
c.execute(sql)
archives = {}
for row in c:
archives[row[0]] = row[1]
return archives
def update_archives(self):
conn = self.conn()
c = conn.cursor()
c.execute('SELECT name FROM archive WHERE updating = 0 ORDER BY name');
for row in c:
name = row[0]
c.execute(u'UPDATE archive set updating=1 where name=?', (name, ))
conn.commit()
self.spider(name)
updated = time.mktime(time.localtime())
c.execute(u'UPDATE archive set updated=?, updating=0 where name=?', (updated, name))
conn.commit()
def remove_archive(self, name):
conn = self.conn()
c = conn.cursor()
c.execute('DELETE FROM archive WHERE path=?', (path, ))
c.execute('DELETE FROM file WHERE path LIKE(?%)', (path, ))
conn.commit()
#web
def json_response(request, data):
request.headers['Content-Type'] = 'text/javascript'
return json.dumps(data, indent=2)
class OxControl(Resource):
_children = []
#isLeaf = True
def __init__(self, db_path):
self.db_path = db_path
Resource.__init__(self)
self.db = Database(self.db_path)
self.putChild("media", File(self.db.get('media_cache', 'media')))
#FIXME: this is just for debugging
if not 'Test' in self.db.archives():
self.db.add_archive('0xdb.org', 'Test', '/media/2010/Movies')
def putChild(self, name, child):
self._children.append(name)
return Resource.putChild(self, name, child)
def getChild(self, name, request):
if name in self._children:
return Resource.getChild(self, name, request)
return self
def render_GET(self, request):
if request.path == '/files':
"""
/files
optional ?since=unixtimestamp
new/modified
files by archive
"""
since = request.args.get("since", None)
if since: since = float(since[0])
files = self.db.files(since)
return json_response(request, files)
if request.path == '/update':
"""
checks for new files in all known archives
"""
#update in another thread, this otherwise blocks web server
thread.start_new_thread(self.db.update_archives,())
response = {'status': 'ok'}
return json_response(request, response)
if request.path == '/extract':
"""
extract derivatives from videos
"""
oshash = request.args.get("oshash", [None])[0]
media = request.args.get("media", [None, ])[0]
retry = request.args.get("retry", [None, ])[0]
response = {'status': 'not enough data provided'}
f = self.db.get_file(oshash)
if not f:
response = {'status': 'unkown oshash'}
elif not 'duration' in f['info']:
response = {'status': 'unkown format, can not extract data'}
else:
if media == 'stills':
name = '%s.png'%video_frame_positions(f['info']['duration'])[0]
elif media.endswith('.webm'):
profile = media[:-5]
if profile in VIDEO_PROFILES:
name = media
else:
response = {'status': 'unsupported video profile requested'}
if name:
#get or create derivative
derivative = self.db.derivative(oshash, name)
if derivative['status'] == STATUS_FAILED and retry:
derivative = self.db.derivative(oshash, name, STATUS_NEW)
response['status'] = {
STATUS_NEW: 'extracting',
STATUS_EXTRACTING: 'extracting',
STATUS_AVAILABLE: 'available',
STATUS_FAILED: 'failed',
}.get(derivative['status'], 'extracting')
if derivative['status'] == STATUS_NEW:
self.db.extract.put((oshash, name))
files = [f['location'] for f in self.db.derivatives(oshash)]
if media == 'stills':
response['stills'] = filter(lambda f: f.endswith('.png'), files)
else:
response['video'] = filter(lambda f: f.endswith('.webm'), files)
return json_response(request, response)
if request.path == '/get':
"""
get information about a file, including derivatives
"""
oshash = request.args.get("oshash", [None, ])[0]
response = {'status': 'no oshash provided'}
if oshash:
f = self.db.get_file(oshash)
response['status'] = 'available'
response['info'] = f['info']
files = [f['location'] for f in self.db.derivatives(oshash)]
response['video'] = filter(lambda f: f.endswith('.webm'), files)
response['stills'] = filter(lambda f: f.endswith('.png'), files)
return json_response(request, response)
return "<!DOCTYPE html><html>this is not for humans</html>"
if __name__ == '__main__':
db = 'dev.sqlite'
port = 2620
username = 'fix'
password = 'me'
interface = '127.0.0.1'
interface = '10.26.20.10'
interface = '0.0.0.0'
print 'http://%s:%d/' % (interface, port)
root = OxControl(db)
checker = InMemoryUsernamePasswordDatabaseDontUse()
checker.addUser(username, password)
class PublicHTMLRealm(object):
implements(IRealm)
def requestAvatar(self, avatarId, mind, *interfaces):
if IResource in interfaces:
return (IResource, root, lambda: None)
raise NotImplementedError()
portal = Portal(PublicHTMLRealm(), [checker])
credentialFactory = DigestCredentialFactory("md5", "oxbackend")
resource = HTTPAuthSessionWrapper(portal, [credentialFactory])
site = server.Site(resource)
reactor.listenTCP(port, site, interface=interface)
reactor.run()