oxdb archive, backend 219

2007-07-10 12:31:08 +00:00 · 2007-07-10 12:31:08 +00:00 · e46666b6d9
commit e46666b6d9
39 changed files with 3265 additions and 0 deletions
--- a/README.txt
+++ b/README.txt
@ -0,0 +1,4 @@
+oxdbarchive
+
+This is a TurboGears (http://www.turbogears.org) project. It can be
+started by running the start-oxdbarchive.py script.
--- a/dev.cfg
+++ b/dev.cfg
@ -0,0 +1,47 @@
+[global]
+# This is where all of your settings go for your development environment
+# Settings that are the same for both development and production
+# (such as template engine, encodings, etc.) all go in 
+# oxdbarchive/config/app.cfg
+
+# DATABASE
+sqlobject.dburi="notrans_mysql://root@localhost/oxdbarchive?sqlobject_encoding=utf-8&use_unicode=1&charset=utf8"
+
+# SERVER
+server.socket_port=8081
+
+# Enable the debug output at the end on pages.
+# log_debug_info_filter.on = False
+
+server.environment="development"
+autoreload.package="oxdbarchive"
+
+# Auto-Reload after code modification
+# autoreload.on = True
+
+# Set to True if you'd like to abort execution if a controller gets an
+# unexpected parameter. False by default
+tg.strict_parameters = True
+
+# LOGGING
+# Logging configuration generally follows the style of the standard
+# Python logging module configuration. Note that when specifying
+# log format messages, you need to use *() for formatting variables.
+# Deployment independent log configuration is in oxdbarchive/config/log.cfg
+[logging]
+
+[[loggers]]
+[[[oxdbarchive]]]
+level='DEBUG'
+qualname='oxdbarchive'
+handlers=['debug_out']
+
+[[[allinfo]]]
+level='INFO'
+handlers=['debug_out']
+
+[[[access]]]
+level='INFO'
+qualname='turbogears.access'
+handlers=['access_out']
+propagate=0
--- a/oxdbarchive.egg-info/PKG-INFO
+++ b/oxdbarchive.egg-info/PKG-INFO
@ -0,0 +1,15 @@
+Metadata-Version: 1.0
+Name: oxdbarchive
+Version: 1.0
+Summary: UNKNOWN
+Home-page: UNKNOWN
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: Development Status :: 3 - Alpha
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Framework :: TurboGears
--- a/oxdbarchive.egg-info/SOURCES.txt
+++ b/oxdbarchive.egg-info/SOURCES.txt
@ -0,0 +1,21 @@
+README.txt
+setup.py
+start-oxdbarchive.py
+oxdbarchive/__init__.py
+oxdbarchive/controllers.py
+oxdbarchive/json.py
+oxdbarchive/model.py
+oxdbarchive/release.py
+oxdbarchive.egg-info/PKG-INFO
+oxdbarchive.egg-info/SOURCES.txt
+oxdbarchive.egg-info/dependency_links.txt
+oxdbarchive.egg-info/not-zip-safe
+oxdbarchive.egg-info/paster_plugins.txt
+oxdbarchive.egg-info/requires.txt
+oxdbarchive.egg-info/sqlobject.txt
+oxdbarchive.egg-info/top_level.txt
+oxdbarchive/config/__init__.py
+oxdbarchive/templates/__init__.py
+oxdbarchive/tests/__init__.py
+oxdbarchive/tests/test_controllers.py
+oxdbarchive/tests/test_model.py
--- a/oxdbarchive.egg-info/dependency_links.txt
+++ b/oxdbarchive.egg-info/dependency_links.txt
@ -0,0 +1 @@
+
--- a/oxdbarchive.egg-info/not-zip-safe
+++ b/oxdbarchive.egg-info/not-zip-safe
@ -0,0 +1 @@
+
--- a/oxdbarchive.egg-info/paster_plugins.txt
+++ b/oxdbarchive.egg-info/paster_plugins.txt
@ -0,0 +1,2 @@
+TurboGears
+PasteScript
--- a/oxdbarchive.egg-info/requires.txt
+++ b/oxdbarchive.egg-info/requires.txt
@ -0,0 +1 @@
+TurboGears >= 1.0.2.2
--- a/oxdbarchive.egg-info/sqlobject.txt
+++ b/oxdbarchive.egg-info/sqlobject.txt
@ -0,0 +1,2 @@
+db_module=oxdbarchive.model
+history_dir=$base/oxdbarchive/sqlobject-history
--- a/oxdbarchive.egg-info/top_level.txt
+++ b/oxdbarchive.egg-info/top_level.txt
@ -0,0 +1 @@
+oxdbarchive
--- a/oxdbarchive/init.py
+++ b/oxdbarchive/init.py
--- a/oxdbarchive/cache.py
+++ b/oxdbarchive/cache.py
@ -0,0 +1,81 @@
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+import os
+from os.path import abspath, exists, join, dirname, basename
+import shutil
+from glob import glob
+
+import Image
+from StringIO import StringIO
+
+from scrapeit.utils import read_url
+
+cache_root = join(dirname(abspath(__file__)), 'cache')
+img_extension = "jpg"
+
+frame_cache_root = join(cache_root, 'frame')
+
+mini_movie_folder = '/mnt/storage/oil/oxdb/mini'
+
+def loadFile(f_name):
+  f = open(f_name)
+  data = f.read()
+  f.close()
+  return data
+
+def saveFile(f_name, data):
+  f = open(f_name, 'w')
+  f.write(data)
+  f.close()
+
+def loadStaticFile(fname):
+  return loadFile(join(dirname(abspath(__file__)), "static", fname))
+
+def framePath(frameType, movieID, position):
+  position = position.replace(':', '.')
+  frame_root = join(cache_root, frameType)
+  frame = join(frame_root, imgName(join(movieID, position)))
+  if not exists(dirname(frame)):
+    os.makedirs(dirname(frame))
+  return frame
+
+def loadDefaultFrame(afile):
+  frame = framePath('frame', afile, 'default').replace('jpg', 'png')
+  if not exists(frame):
+    data = loadStaticFile('images/stillDark.png')
+    imageIO = StringIO(data)
+    sourceImage = Image.open(imageIO)
+    sourceWidth = sourceImage.size[0]
+    sourceHeight = sourceImage.size[1]
+    top = (sourceHeight - afile.sceneHeight) / 2
+    targetImage = sourceImage.crop((0, top, sourceWidth, top + afile.sceneHeight))
+    targetImage.save(frame, 'PNG')
+  return loadFile(frame)
+
+def loadFrame(afile, position):
+  position = basename(position)
+  frame = framePath('frame', afile.oxdb, position)
+  if not exists(frame):
+    afile.extractFrame(position)
+  if exists(frame):
+    return loadFile(frame)
+  return loadDefaultFrame(afile)
+
+def loadClip(afile, position):
+  position = basename(position)
+  flash = framePath('frame', afile.oxdb, position).replace(img_extension, 'flv')
+  if not exists(flash):
+    afile.extractClip(position)
+  if exists(flash):
+    return loadFile(flash)
+  return ''
+
+def loadTimeline(afile):
+  timeline = afile.timelineFile
+  if not exists(timeline):
+    afile.extractTimeline()
+  if exists(timeline):
+    return loadFile(timeline)
+  return ''
--- a/oxdbarchive/config/init.py
+++ b/oxdbarchive/config/init.py
--- a/oxdbarchive/config/app.cfg
+++ b/oxdbarchive/config/app.cfg
@ -0,0 +1,51 @@
+[global]
+# The settings in this file should not vary depending on the deployment
+# environment. dev.cfg and prod.cfg are the locations for
+# the different deployment settings. Settings in this file will
+# be overridden by settings in those other files.
+
+# The commented out values below are the defaults
+
+# VIEW
+
+# which view (template engine) to use if one is not specified in the
+# template name
+# tg.defaultview = "kid"
+
+# The following kid settings determine the settings used by the kid serializer.
+
+# One of (html|html-strict|xhtml|xhtml-strict|xml|json)
+# kid.outputformat="html"
+
+# kid.encoding="utf-8"
+
+# The sitetemplate is used for overall styling of a site that
+# includes multiple TurboGears applications
+# tg.sitetemplate="<packagename.templates.templatename>"
+
+# Allow every exposed function to be called as json,
+# tg.allow_json = False
+
+# List of Widgets to include on every page.
+# for exemple ['turbogears.mochikit']
+# tg.include_widgets = []
+
+# Set to True if the scheduler should be started
+# tg.scheduler = False
+
+# Set session or cookie
+# session_filter.on = True
+
+
+# compress the data sends to the web browser
+# [/] 
+# gzip_filter.on = True
+# gzip_filter.mime_types = ["application/x-javascript", "text/javascript", "text/html", "text/css", "text/plain"]
+
+[/static]
+static_filter.on = True
+static_filter.dir = "%(top_level_dir)s/static"
+
+[/favicon.ico]
+static_filter.on = True
+static_filter.file = "%(top_level_dir)s/static/images/favicon.ico"
--- a/oxdbarchive/config/log.cfg
+++ b/oxdbarchive/config/log.cfg
@ -0,0 +1,29 @@
+# LOGGING
+# Logging is often deployment specific, but some handlers and
+# formatters can be defined here.
+
+[logging]
+[[formatters]]
+[[[message_only]]]
+format='*(message)s'
+
+[[[full_content]]]
+format='*(asctime)s *(name)s *(levelname)s *(message)s'
+
+[[handlers]]
+[[[debug_out]]]
+class='StreamHandler'
+level='DEBUG'
+args='(sys.stdout,)'
+formatter='full_content'
+
+[[[access_out]]]
+class='StreamHandler'
+level='INFO'
+args='(sys.stdout,)'
+formatter='message_only'
+
+[[[error_out]]]
+class='StreamHandler'
+level='ERROR'
+args='(sys.stdout,)'
--- a/oxdbarchive/controllers.py
+++ b/oxdbarchive/controllers.py
@ -0,0 +1,39 @@
+from turbogears import controllers, expose, flash, redirect
+from model import *
+import cherrypy
+
+# import logging
+# log = logging.getLogger("oxdbarchive.controllers")
+
+'''
+/md5/frame/position.jpg
+/md5/clip/position.jpg
+/md5/metadata
+'''
+
+def httpExpires(sec):
+  exp_date = cherrypy.lib.httptools.HTTPDate(time.gmtime(time.mktime(time.gmtime()) + sec))
+  return exp_date
+
+class Root(controllers.RootController):
+  @expose()
+  def default(self, md5Hash, action, position = None):
+    f = ArchiveFile.byMd5sum(md5Hash)
+    if action == 'metadata':
+      return dict(meta = f)
+    elif position or action == 'timeline': #clip / frame / timeline
+      cherrypy.response.headerMap['Content-Type'] = "image/jpeg"
+      cherrypy.response.headerMap["Expires"] = httpExpires(60*60*24*15)
+      position = position.replace('.png', '').replace('.jpg', '')
+      position = position.replace('-', ':').replace('.',':')
+      if action == 'timeline':
+        return f.timeline()
+      if action == 'clip':
+        return f.clip(position)
+      elif action == 'frame':
+        return f.frame(position)
+    redirect('http://0xdb.oil21.org/')
+    
+  @expose()
+  def index(self):
+    redirect('http://0xdb.oil21.org/')
--- a/oxdbarchive/cron.py
+++ b/oxdbarchive/cron.py
@ -0,0 +1,13 @@
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+from model import *
+
+def findeNew():
+  archive = Archive.get(1)
+  archive.importFiles()
+
+def extractNew():
+  for f in ArchiveFile.select(ArchiveFile.q.extracted == False):
+    f.extractAll()
--- a/oxdbarchive/json.py
+++ b/oxdbarchive/json.py
@ -0,0 +1,26 @@
+# A JSON-based API(view) for your app.
+# Most rules would look like:
+#
+# @jsonify.when("isinstance(obj, YourClass)")
+# def jsonify_yourclass(obj):
+#     return [obj.val1, obj.val2]
+#
+# @jsonify can convert your objects to following types:
+# lists, dicts, numbers and strings
+
+from turbojson.jsonify import jsonify
+
+@jsonify.when('isinstance(obj, ArchiveFile)')
+def jsonify_ArchiveFile(obj):
+    result = jsonify_sqlobject( obj )
+    #FIXME, possibly do something with the date values
+    # date, date_added, modDate, pubDate
+    for key in ('id', 'archiveID', 'subtitle_meta_id'):
+      result.pop(key)
+    
+    return result
+
+@jsonify.when('isinstance(obj, datetime.datetime)')
+def jsonify_datetime(obj):
+  return str(obj)
+
--- a/oxdbarchive/midentify.py
+++ b/oxdbarchive/midentify.py
@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+import os
+import sys
+
+_audio_codec_map = {
+  'ffmp3': 'mp3',
+}
+
+_video_codec_map = {
+  'xvid': 'XviD',
+  'dx50': 'DivX',
+  'dvsd': 'DV',
+  'divx': 'DivX',  
+}
+
+def oggzinfo(fname):
+  cmd = 'oggzinfo -b -l "%s"' % fname
+  f = os.popen(cmd.encode('utf-8'))
+  data = f.read().strip()
+  f.close()
+  raw_dict = {}
+  for row in data.split('\n'):
+    try:
+      t = row.split(':')
+      key = t[0]
+      value = ":".join(t[1:])
+      raw_dict[key.strip()] = value.strip()
+    except:
+      pass
+  oxdb_dict = {}
+  duration = raw_dict.get('Content-Duration',-1)
+  d = duration.split(':')
+  duration = 0
+  while d:
+    duration = float(d.pop(0)) + duration * 60
+  oxdb_dict['length'] = int(duration * 1000)
+  oxdb_dict['height'] = int(raw_dict.get('Video-Height',0))
+  oxdb_dict['width'] = int(raw_dict.get('Video-Width',0))
+  oxdb_dict['fps'] = float(raw_dict.get('Video-Framerate', '0').replace('fps',''))
+  if float(oxdb_dict['height']):
+    oxdb_dict['aspect'] = float(oxdb_dict['width']) / float(oxdb_dict['height'])
+  else:
+    oxdb_dict['aspect'] = -1.0
+  oxdb_dict['video_bitrate'] = int(float(raw_dict.get('Content-Bitrate-Average','0').replace('kbps','')) * 1024)
+  oxdb_dict['video_codec'] = 'Theora'
+  oxdb_dict['audio_bitrate'] = int(float(raw_dict.get('Content-Bitrate-Average','0').replace('kbps','')) * 1024)
+  oxdb_dict['audio_codec'] = 'Vorbis'
+  oxdb_dict['audio_rate'] = int(raw_dict.get('Audio-Samplerate', '0').replace('Hz', ''))
+  oxdb_dict['audio_channels'] = int(raw_dict.get('Audio-Channels',1))
+  return oxdb_dict
+  
+def identify(fname):
+  if fname.endswith('sub') or fname.endswith('srt') or fname.endswith('idx'):
+    return dict(
+      length=0, height=0, width=0, fps=0, 
+      video_bitrate=0, audio_bitrate=0, audio_rate=0, audio_channels=0,
+      audio_codec='', video_codec='', aspect=-1
+    )
+  if fname.endswith('ogg') or fname.endswith('ogv'):
+    return oggzinfo(fname)
+  
+  cmd = 'midentify "%s"' % fname
+  f = os.popen(cmd.encode('utf-8'))
+  data = f.read().strip()
+  f.close()
+  raw_dict = {}
+  for row in data.split('\n'):
+    try:
+      key, value = row.split('=')
+      raw_dict[key] = value.strip()
+    except:
+      pass
+  oxdb_dict = {}
+  oxdb_dict['length'] = int(float(raw_dict.get('ID_LENGTH',-1)) * 1000)
+  oxdb_dict['height'] = int(raw_dict.get('ID_VIDEO_HEIGHT',0))
+  oxdb_dict['width'] = int(raw_dict.get('ID_VIDEO_WIDTH',0))
+  oxdb_dict['fps'] = float(raw_dict.get('ID_VIDEO_FPS',0))
+  oxdb_dict['aspect'] = float(raw_dict.get('ID_VIDEO_ASPECT',0))
+  if not oxdb_dict['aspect'] and float(oxdb_dict['height']):
+    oxdb_dict['aspect'] = float(oxdb_dict['width']) / float(oxdb_dict['height'])
+  else:
+    oxdb_dict['aspect'] = -1.0
+  oxdb_dict['video_bitrate'] = int(raw_dict.get('ID_VIDEO_BITRATE',0))
+  oxdb_dict['video_codec'] = raw_dict.get('ID_VIDEO_FORMAT','unknown').lower()
+  oxdb_dict['audio_bitrate'] = int(raw_dict.get('ID_AUDIO_BITRATE',0))
+  oxdb_dict['audio_codec'] = raw_dict.get('ID_AUDIO_CODEC','unknown').lower()
+  oxdb_dict['audio_rate'] = int(raw_dict.get('ID_AUDIO_RATE',0))
+  oxdb_dict['audio_channels'] = int(raw_dict.get('ID_AUDIO_NCH',1))
+  oxdb_dict['audio_codec'] = _audio_codec_map.get(oxdb_dict['audio_codec'], oxdb_dict['audio_codec'])
+  oxdb_dict['video_codec'] = _video_codec_map.get(oxdb_dict['video_codec'], oxdb_dict['video_codec'])
+  if oxdb_dict['length'] < 0: oxdb_dict['length'] = 0
+  return oxdb_dict
+  
--- a/oxdbarchive/model.py
+++ b/oxdbarchive/model.py
@ -0,0 +1,499 @@
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=2:sts=2:ts=2
+from sqlobject import *
+from turbogears.database import PackageHub
+import turbogears
+import re
+from urllib import quote, quote_plus
+import os
+from os.path import abspath, join, dirname
+from datetime import datetime
+import time
+import math
+from glob import glob
+import shutil
+
+
+import oxdb_cache
+import cache
+import oxdb_import
+from oxdb_utils import oxdb_title, oxdb_director, oxdb_id
+from subtitles import *
+import midentify
+
+
+hub = PackageHub('oxdbarchive')
+__connection__ = hub
+
+class Archive(SQLObject):
+  name = UnicodeCol(length=255, alternateID=True)
+  basePath = UnicodeCol()
+  
+  def _get_basePath(self):
+    basePath = self._SO_get_basePath()
+    if not basePath.endswith('/'):
+      basePath = basePath + "/"
+      self.basePath = basePath
+    return basePath
+  
+  def _get_files(self):
+    q = ArchiveFile.select(ArchiveFile.q.archiveID == self.id)
+    return [f for f in q]
+  
+  def _get_file_list(self):
+    files = {}
+    for f in self.files:
+      try:
+        d = dict(md5sum = f.md5sum, size = f.size)
+        files[f.path] = d
+      except SQLObjectNotFound:
+        f.destroySelf()
+    return files
+  
+  def addLocalFile(self, fname, movie = None): 
+    params = oxdb_import.oxdb_file_stats(fname)
+    params = oxdb_import.oxdb_file_metadata(params)
+    params['date'] = datetime.fromtimestamp(params['date'])
+    return self.addFile(params, movie)
+  
+  def addFile(self, params, movie = None):
+    '''
+      updates or adds new file to database, 
+      params is a dict with at least md5sum, path, date but also needs
+      audio, video, length, size, bpp for new files
+    '''
+    params['path'] = params['path'].replace(self.basePath, '')
+    q = ArchiveFile.select(AND(
+        ArchiveFile.q.archiveID == self.id,
+        ArchiveFile.q.md5sum == params['md5sum'],
+      ))
+    if q.count() > 0:
+      '''update existing entry'''
+      f = q[0]
+      #FIXME: deal with double files here. right now they are changed
+      if f.path != params['path']:
+        ret =  "this file is already in the database, first time at:\n\t%s\n\t" % f.path
+      else:
+        ret = "updated entry"
+      for field in ('path', 'date'):
+        setattr(f, field, params[field])
+    else:
+      #just a new md5? happens for srt files quite often
+      qq = ArchiveFile.select(AND(
+        ArchiveFile.q.archiveID == self.id,
+        ArchiveFile.q.path == params['path'],
+      ))
+      f = None
+      if qq.count() == 1:
+        f = qq[0]
+        ret = "updated entry"
+      else:
+        ''' add new file to database '''
+        title = oxdb_title(params['path'])
+        director = oxdb_director(params['path'])
+        oxdb = oxdb_id(title, director)
+        f = ArchiveFile(
+          archive = self,
+          path = params['path'], 
+          date = params['date'],
+          oxdb = oxdb,
+          md5sum = params['md5sum'],
+          audio = params['audio'],
+          video = params['video'],
+          length = params['length'],
+          size = params['size'],
+          bpp = params['bpp'],
+          date_added = datetime.now(),
+          subtitle = params['path'].endswith('.srt'),
+        )
+        ret = "added entry"
+    f.updateMeta()
+    return ret
+  
+  def removeFile(self, md5sum):
+    '''
+      remove file based on md5sum from archive
+    '''
+    q = ArchiveFile.select(AND(
+        ArchiveFile.q.archiveID == self.id,
+        ArchiveFile.q.md5sum == md5sum,
+      ))
+    if q.count() == 1:
+      for i in q: 
+        ArchiveFile.delete(i.id)
+      return dict(result="file removed")
+    return dict(result="not in archive")
+  
+  def importFiles(self):
+    stats = {'skipped': 0, 'added': 0, 'remove':0}
+    print self.basePath
+    files = oxdb_import.oxdb_spider(self.basePath)
+  
+    oxdb_files = self.file_list()
+    md5sum_on_disk = []
+    for f in files:
+      meta = oxdb_import.oxdb_file_stats(f)
+      f = f.replace(base, '')
+      if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
+        stats['skipped'] += 1
+        md5sum_on_disk.append(oxdb_files[f]['md5sum'])
+      else:
+        meta = oxdb_import.oxdb_file_metadata(meta)
+        #remove base
+        meta['path'] = f.encode('utf-8')
+        #ignore files changed in the last 5 minutes
+        print self.addFile(meta), f
+        stats['added'] += 1
+        md5sum_on_disk.append(meta['md5sum'])
+    for f in oxdb_files:
+      if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
+        print "remove", f
+        self.removeFile({'md5sum':oxdb_files[f]['md5sum']})
+        stats['remove'] += 1
+    print stats
+    return stats
+
+
+class ArchiveFile(SQLObject):
+  '''
+  ALTER TABLE file_meta CHANGE size size bigint;
+  ALTER TABLE file_meta CHANGE pixels pixels bigint;
+  ALTER TABLE file_meta CHANGE srt srt LONGTEXT;
+  '''
+  md5sum = UnicodeCol(length=128, alternateID=True)
+  oxdb = UnicodeCol(length=128)
+  path = UnicodeCol()
+  date = DateTimeCol()
+  
+  archive = ForeignKey('Archive')
+  
+  audio = UnicodeCol()
+  video = UnicodeCol()
+  length = IntCol()
+  size = IntCol()
+  bpp = IntCol(default = 0)
+  pixels = IntCol(default = 0)
+  
+  date_added = DateTimeCol(default = datetime.now)
+  pubDate = DateTimeCol(default = datetime.now)
+  modDate = DateTimeCol(default = datetime.now)
+  
+  height = IntCol(default = -1)
+  width = IntCol(default = -1)
+  frameAspect = UnicodeCol(default = "1.6", length = 100)
+  
+  bitrate = IntCol(default = -1)
+  fps = IntCol(default = -1)
+  srt = UnicodeCol(default = '')
+  subtitle_meta_id = IntCol(default = -1)
+  subtitle = BoolCol(default = False)
+  part = IntCol(default = 1)
+  
+  broken = BoolCol(default = False)
+  
+  extracted = BoolCol(default = False)
+  
+  filename = UnicodeCol(default = '')
+  
+  def _get_part(self):
+    part = 1
+    parts = re.compile('Part (\d)').findall(self.path)
+    if not parts:
+      parts = re.compile('CD (\d)').findall(self.path)
+    if parts:
+      part = int(parts[-1])
+    self.part = part
+    return part
+  
+  def _get_offset(self):
+    if not self.part:
+      self.part = 1
+    if self.part == 1:
+      return 0
+    length = 0
+    q = ArchiveFile.select(AND(
+      ArchiveFile.q.oxdb == self.oxdb,
+      ArchiveFile.q.part < self.part,
+      ArchiveFile.q.subtitle == False,
+      ))
+    for f in q:
+      length += f.length
+    return length
+  
+  def _get_ext(self):
+    return self.path.split('.')[-1]
+  
+  def _get_preferredVersion(self):
+    e = self.nameExtra.lower()
+    for pref in ('directors cut', 'long version'):
+      if pref in e:
+        return True
+    return False
+  
+  def _get_nameExtra(self):
+    path = os.path.basename(self.path)
+    parts = path.replace(self.title, '').split('.')[:-1]
+    parts = filter(lambda x: not x.startswith('Part'), parts)
+    parts = filter(lambda x: not x.startswith('Season'), parts)
+    parts = filter(lambda x: not x.startswith('Episode'), parts)
+    parts = filter(lambda x: not x.startswith('vo'), parts)
+    extra = '.'.join(parts)
+    if extra.startswith('.'):
+      extra = extra[1:]
+    return extra
+  
+  def _get_title(self):
+    return oxdb_title(self.path)
+  
+  def _get_director(self):
+    return oxdb_director(self.path)
+  
+  def _get_absolutePath(self):
+    return join(self.archive.basePath, self.path)
+  
+  def updateMeta(self):
+    self.findSubtitleLink()
+    if os.path.exists(self.absolutePath):
+      info = midentify.identify(self.absolutePath)
+      self.length = info['length']
+      self.width = info['width']
+      self.frameAspect = "%0.6f" % info['aspect']
+      self.height = info['height']      
+      self.bitrate = info['video_bitrate']
+      self.fps = info['fps']
+      self.audio = info['audio_codec']
+      self.video = info['video_codec']
+      self.updatePixels()
+      self.updateBpp()
+      self.loadSubtitleFromFile()
+      self.oxdb = oxdb_id(self.title, self.director)
+  
+  def _get_frameAspect(self):
+    aspect =  float(self._SO_get_frameAspect())
+    if self.subtitle:
+      return 1
+    if aspect == -1:
+      if self.height:
+        aspect = float(self.width) / self.height
+      else:
+        aspect = 16.0 / 10
+      self.frameAspect = "%0.6f" % aspect
+    return aspect
+  
+  def _get_sceneHeight(self):
+    default = 80
+    if not self.subtitle:
+      h = int(128 / self.frameAspect)
+      h = h + h % 2
+      return h
+    return default
+
+  def _get_movieFile(self):
+    if self.subtitle and self.subtitle_meta_id>0:
+      try:
+        m = ArchiveFile.get(self.subtitle_meta_id)
+      except:
+        m = None
+        self.subtitle_meta_id = -1
+        self.srt = ''
+      return m
+    return None
+  
+  def _get_subtitleFile(self):
+    if not self.subtitle and self.subtitle_meta_id>0:
+      try:
+        s = ArchiveFile.get(self.subtitle_meta_id)
+      except:
+        s = None
+        self.subtitle_meta_id = -1
+        self.srt = ''
+      return s
+    return None
+  
+  def findSubtitleLink(self):
+    subtitle = not self.subtitle
+    q = ArchiveFile.select(AND(
+      ArchiveFile.q.oxdb == self.oxdb,
+      ArchiveFile.q.part == self.part,
+      ArchiveFile.q.subtitle == subtitle,
+      ))
+    self.subtitle_meta_id = -1
+    if q.count():
+      for f in q:
+        if not f.path.endswith('.sub'):
+          if f.nameExtra == self.nameExtra or f.nameExtra == 'en':
+            self.subtitle_meta_id = f.id
+  
+  
+  def _get_mini_movie_file(self):
+    return join(oxdb_cache.mini_movie_folder, self.md5sum[:4], "%s.avi" % self.md5sum)
+  
+  def removeMiniMovie(self): 
+    if os.path.exists(self.mini_movie_file):
+      os.remove(self.mini_movie_file)
+  
+  def _findSubtitleByStart(self, start):
+    if self.srt:
+      d = srt2dict(self.srt)
+      for s in d.values():
+        if s['start'] == start:
+          return s
+    return None
+    
+  def extractAll(self, force = False):
+    self.updateMeta()
+    self.extractClipMovie()
+    self.extractTimeline()
+  
+  def extractClip(self, inpoint, outpoint=-1, flash_folder=oxdb_cache.frame_cache_root): 
+    movie_file = self.mini_movie_file
+    flash_folder = join(flash_folder, self.oxdb)
+    flash_movie = join(flash_folder, "%s.flv" % inpoint.replace(':', '.'))
+    if not os.path.exists(flash_folder):
+      os.makedirs(flash_folder)
+    width = 128
+    height = int(width / (self.width / self.height))
+    height = height - height % 2
+    if outpoint == -1:
+      s = self._findSubtitleByStart(inpoint)
+      if s:
+        outpoint = s['stop']
+      else:
+        outpoint = shift_time(2000, inpoint)
+    if self.part > 1:
+      offset = self.offset
+    extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
+    #extract_flash_ng(self.absolutePath, flash_movie, inpoint, outpoint, width, height, offset)
+  
+  def extractFrame(self, position, img_folder=oxdb_cache.frame_cache_root): 
+    if self.movieFile:
+      return self.movieFile.extractFrame(position, img_folder)
+    movie_file = self.mini_movie_file
+    img_folder = join(img_folder, self.oxdb)
+    if not os.path.exists(img_folder):
+      os.makedirs(img_folder)
+    extract_frame(movie_file, position, img_folder, offset = 0, redo = False)
+  
+  def extractFrames(self, img_folder=oxdb_cache.frame_cache_root): 
+    if self.movieFile:
+      return self.movieFile.extractFrames(img_folder)
+    movie_file = self.absolutePath
+    img_folder = join(img_folder, self.oxdb)
+    if not os.path.exists(img_folder):
+      os.makedirs(img_folder)    
+    extract_subtitles(movie_file, self.srt.encode('utf-8'), img_folder, width=100, offset=self.offset)
+  
+  def extractClipMovie(self, force = False): 
+    if self.broken:
+      return
+    mini_movie_file = self.mini_movie_file
+    movie_file = self.absolutePath
+    if not movie_file or not os.path.exists(movie_file):
+      return
+    if os.path.exists(mini_movie_file):
+      print "clip exists, skipping extraction", mini_movie_file
+      return
+    if not os.path.exists(dirname(mini_movie_file)):
+      os.makedirs(dirname(mini_movie_file))
+    options = ''
+    options += " -ovc lavc -lavcopts vcodec=mjpeg"
+    options += " -af volnorm=1 -oac mp3lame -lameopts br=64:mode=3 -af resample=44100"
+    options += " -vf scale -zoom -xy 128"
+    options += ' "%s"' % movie_file
+    options += ' -o "%s"' % mini_movie_file
+    cmd = "mencoder %s >/dev/null 2>&1" % options
+    print cmd.encode('utf-8')
+    os.system(cmd.encode('utf-8'))
+
+  def _get_timelineFile(self):
+    return join(oxdb_cache.cache_root, 'timeline', self.md5sum[:4], "%s.png" % self.md5sum)
+    
+  def removeTimeline(self):
+    if os.path.exists(self.timelineFile):
+      os.unlink(self.timelineFile)
+      
+  def extractTimeline(self, force = False):
+    if self.broken:
+      return
+    if force:
+      self.removeTimeline()
+
+    #return if its not a video
+    if self.height <= 0:
+      return
+    
+    t = self.timelineFile
+    if os.path.exists(self.mini_movie_file):
+      if not os.path.exists(t):
+        os.makedirs(os.path.dirname(t))
+    #lets only extract the timeline if it does not exist yet
+    if os.path.exists(t):
+      print "skipping, ", self.path
+      return
+    extractTimelineScript = abspath(join(dirname(__file__), "tools/extract_timeline.py"))
+    cmd = "python %s %s %s" %(extractTimelineScript, t, self.mini_movie_file)
+    os.system(cmd)
+  
+  def loadSubtitleFromFile(self): 
+    if self.movieFile:
+      movieFile = self.movieFile
+      subtitle = self
+    else:
+      movieFile = self
+      subtitle = self.subtitleFile
+    if movieFile:
+      movieFile.srt = ''
+    if subtitle and movieFile:
+      if not subtitle.absolutePath or not os.path.exists(subtitle.absolutePath):
+        return
+      if not subtitle.absolutePath.endswith('.srt'):
+        print "this is not a subtitle", subtitle.absolutePath
+        return
+      movieFile.srt  = loadSrt(subtitle.absolutePath)
+  
+  def _set_fps(self, fps):
+    fps = int(fps * 10000)
+    self._SO_set_fps(fps)
+  
+  def _get_fps(self):
+    fps = self._SO_get_fps()
+    if fps:
+      return float(fps) / 10000
+    return 0.0
+  
+  def _get_resolution(self):
+    if self.subtitle or (not self.width or not self.height):
+      return u''
+    return "%sx%s" % (self.width, self.height)
+  
+  def updateBpp(self):
+    if self.height and self.width and self.fps and self.bitrate:
+      bpp = int(self.height * self.width * self.fps / self.bitrate)
+      self.bpp = bpp
+    else:
+      bpp = 0
+    return bpp
+  
+  def updatePixels(self):
+    if self.length and self.fps and self.width and self.height:
+      pixels = int((self.length / 1000) * self.fps * self.width * self.height)
+      self.pixels = pixels
+    else:
+      pixels = 0
+    return pixels
+  
+  def _get_pixels(self):
+    pixels = self._SO_get_pixels()
+    if not pixels:
+      pixels = self.updatePixels()
+    return pixels
+  
+  def clip(self, position):
+    return cache.loadClip(self, position)
+  
+  def frame(self, position):
+    return cache.loadFrame(self, position)
+  
+  def timeline(self):
+    return cache.loadTimeline(self)
--- a/oxdbarchive/oxdb_cache.py
+++ b/oxdbarchive/oxdb_cache.py
@ -0,0 +1,273 @@
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+import os
+from os.path import abspath, exists, join, dirname, basename
+import shutil
+from glob import glob
+
+import Image
+from StringIO import StringIO
+
+from scrapeit.utils import read_url
+
+cache_root = join(dirname(abspath(__file__)), 'cache')
+img_extension = "jpg"
+
+frame_cache_root = join(cache_root, 'frame')
+
+mini_movie_folder = '/mnt/storage/oil/oxdb/mini'
+
+def loadFile(f_name):
+  f = open(f_name)
+  data = f.read()
+  f.close()
+  return data
+
+def saveFile(f_name, data):
+  f = open(f_name, 'w')
+  f.write(data)
+  f.close()
+
+def loadStaticFile(fname):
+  return loadFile(join(dirname(abspath(__file__)), "static", fname))
+
+'''
+  returns name including a possible directory level for a given hash
+'''
+def imgName(imdb):
+  return "%s.%s" % (imdb, img_extension)
+
+'''
+  returns path to an icon from iconType for given icon in the cache
+'''
+def iconPath(iconType, movie):
+  icon_root = join(cache_root, iconType)
+  icon = join(icon_root, imgName(movie.imdb))
+  if not exists(dirname(icon)):
+    os.makedirs(dirname(icon))
+  return icon
+
+'''
+  render reflection of sourceFile on targetFile, 
+  uses alpha, target files needs to support RGBA, i.e. png
+'''
+def _writeReflection(sourceFile, targetFile, height = 0.5, opacity = 0.25):
+  sourceImage = Image.open(sourceFile).convert('RGB')
+  sourceSource = sourceImage.size[0]
+  sourceHeight = sourceImage.size[1]
+
+  targetWidth = sourceImage.size[0]
+  targetHeight = int(round(sourceHeight * height))
+  targetImage = Image.new('RGBA', (targetWidth, targetHeight))
+
+  for y in range(0, targetHeight):
+    brightness = int(255 * (targetHeight - y) * opacity / targetHeight)
+    for x in range(0, targetWidth):
+      targetColor = sourceImage.getpixel((x, sourceHeight - 1 - y)) 
+      targetColor += (brightness, )
+      targetImage.putpixel((x, y), targetColor)
+  targetImage.save(targetFile, optimized = True)
+
+def resizePoster(data, max_resolution, format = 'JPEG'):
+  posterIO = StringIO(data)
+  sourceImage = Image.open(posterIO)
+  sourceWidth = sourceImage.size[0]
+  sourceHeight = sourceImage.size[1]
+
+  if int(round((float(max_resolution[1]) * sourceWidth) / sourceHeight)) < max_resolution[0]:
+    max_resolution[0] = int(round((float(max_resolution[1]) * sourceWidth) / sourceHeight))
+
+  if int(round((float(max_resolution[0]) / sourceWidth) * sourceHeight)) < max_resolution[1]:
+    max_resolution[1] = int(round((float(max_resolution[0]) / sourceWidth) * sourceHeight))
+    
+  if sourceWidth >= sourceHeight:
+    targetWidth = max_resolution[0]
+    targetHeight = int(round((float(max_resolution[0]) / sourceWidth) * sourceHeight))
+  else:
+    targetWidth = int(round((float(max_resolution[1]) * sourceWidth) / sourceHeight))
+    targetHeight = max_resolution[1]
+  if targetWidth < sourceWidth:
+    resizeMethod = Image.ANTIALIAS
+  else:
+    resizeMethod = Image.BICUBIC
+  targetSize = (targetWidth, targetHeight)
+  targetImage = sourceImage.resize(targetSize, resizeMethod)
+  f = StringIO()
+  if format == 'JPEG':
+    targetImage.save(f, 'JPEG', quality=90)
+  else:
+    targetImage.save(f, 'PNG')
+  
+  
+  return f.getvalue()
+  
+'''
+  download poster from  imdb and resize it before saving into cache
+  returns poster data
+'''
+def downloadPoster(movie):
+  icon = iconPath('poster', movie)
+  if movie.posterFile:
+    data = loadFile(movie.posterFile)
+  else:
+    data = read_url(movie.poster)
+  posterIO = StringIO(data)
+  sourceImage = Image.open(posterIO)
+  sourceWidth = sourceImage.size[0]
+  sourceHeight = sourceImage.size[1]
+  if sourceWidth >= sourceHeight:
+    targetWidth = 128
+    targetHeight = int(round((128.0 / sourceWidth) * sourceHeight))
+  else:
+    targetWidth = int(round((128.0 * sourceWidth) / sourceHeight))
+    targetHeight = 128
+  if targetWidth < sourceWidth:
+    resizeMethod = Image.ANTIALIAS
+  else:
+    resizeMethod = Image.BICUBIC
+  targetSize = (targetWidth, targetHeight)
+  targetImage = sourceImage.resize(targetSize, resizeMethod).convert('RGB')
+  targetImage.save(icon)
+  return loadFile(icon)
+
+'''
+  return icon data, reads from remote url if not cached
+'''
+def loadPoster(movie):
+  if not movie.hasPoster():
+    return ''
+    #return loadStaticFile('images/posterDark.png')
+    #return loadStaticFile('images/posterBlack.png')
+  icon = iconPath('poster', movie)
+  if exists(icon):
+    data = loadFile(icon)
+  else:
+    data = downloadPoster(movie)
+  return data
+  
+'''
+  return icon reflection data, renders reflection if it does not exists
+'''
+def loadPosterReflection(movie):
+  icon = iconPath('poster', movie)
+  iconReflection = iconPath('posterReflection', movie).replace('jpg', 'png')
+  if not exists(iconReflection):
+    if not exists(icon):
+      loadPoster(movie)
+    if exists(icon):
+      _writeReflection(icon, iconReflection)
+    else:
+      return loadStaticFile('images/posterDark.reflection.png')
+  return loadFile(iconReflection)
+
+
+'''
+  returns path to a frame from type for given movie in the cache
+'''
+def framePath(frameType, movie, position):
+  position = position.replace(':', '.')
+  frame_root = join(cache_root, frameType)
+  frame = join(frame_root, imgName(join(movie.imdb, position)))
+  if not exists(dirname(frame)):
+    os.makedirs(dirname(frame))
+  return frame
+
+def loadClip(movie, position):
+  position = basename(position)
+  flash = framePath('frame', movie, position).replace(img_extension, 'flv')
+  if not exists(flash):
+    movie.extractClip(position)
+  if exists(flash):
+    return loadFile(flash)
+  return ''
+  
+'''
+  returns png frame of the given position. 
+'''
+def loadFrame(movie, position):
+  position = basename(position)
+  frame = framePath('frame', movie, position)
+  if not exists(frame):
+    #movie.extractClip(position)
+    movie.extractFrame(position)
+  if exists(frame):
+    return loadFile(frame)
+  return loadDefaultFrame(movie)
+
+def loadDefaultFrameReflection(movie):
+  frame = framePath('frame', movie, 'default').replace('jpg', 'png')
+  frameReflection = framePath('frameReflection', movie, 'default').replace('jpg', 'png')
+  if not exists(frameReflection):
+    if not exists(frame):
+      loadDefaultFrame(movie)
+    if exists(frame):
+      _writeReflection(frame, frameReflection)
+    else:
+      return loadStaticFile('images/stillDark.reflection.png')
+  return loadFile(frameReflection)
+  
+def loadDefaultFrame(movie):
+  frame = framePath('frame', movie, 'default').replace('jpg', 'png')
+  if not exists(frame):
+    data = loadStaticFile('images/stillDark.png')
+    imageIO = StringIO(data)
+    sourceImage = Image.open(imageIO)
+    sourceWidth = sourceImage.size[0]
+    sourceHeight = sourceImage.size[1]
+    top = (sourceHeight - movie.sceneHeight) / 2
+    targetImage = sourceImage.crop((0, top, sourceWidth, top + movie.sceneHeight))
+    targetImage.save(frame, 'PNG')
+  return loadFile(frame)
+
+'''
+  returns png frame reflection of the given position. 
+'''
+def loadFrameReflection(movie, position):
+  position = basename(position)
+  frame = framePath('frame', movie, position)
+  frameReflection = framePath('frameReflection', movie, position).replace('jpg', 'png')
+  if not exists(frameReflection):
+    if not exists(frame):
+      loadFrame(movie, position)
+    if exists(frame):
+      _writeReflection(frame, frameReflection)
+    else:
+      return loadDefaultFrameReflection(movie)
+  return loadFile(frameReflection)
+
+
+def loadTimeline(movie, position):
+  bar = framePath('timeline', movie, position).replace('jpg', 'png')
+  if exists(bar):
+    return loadFile(bar)
+  print bar
+  return ''
+  #FIXME load and return bar hre
+  
+'''
+move cache files to new imdb
+'''
+def moveCache(old_imdb, new_imdb):
+  old = join(cache_root, 'frame', old_imdb)
+  new = join(cache_root, 'frame', new_imdb)
+  if exists(old) and not exists(new):
+    shutil.move(old, new)
+  if exists(old):
+    shutil.rmtree(old)
+    
+  old = join(cache_root, 'frameReflection', old_imdb)
+  new = join(cache_root, 'frameReflection', new_imdb)
+  if exists(old) and not exists(new):
+    shutil.move(old, new)
+  if exists(old):
+    shutil.rmtree(old)
+  
+  old = join(cache_root, 'timeline', old_imdb)
+  new = join(cache_root, 'timeline', new_imdb)
+  if exists(old) and not exists(new):
+    shutil.move(old, new)
+  if exists(old):
+    shutil.rmtree(old)
+  
--- a/oxdbarchive/oxdb_import.py
+++ b/oxdbarchive/oxdb_import.py
@ -0,0 +1,174 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=2:sts=2:ts=2
+# OXDb Import client, crawls the filesystem and gathers information about 
+# movies
+#
+
+import md5
+import os
+import sys
+import re
+import urllib
+import time
+
+import simplejson
+
+from scrapeit.utils import read_url
+
+
+class OXDb:
+  def __init__(self, archive):
+    self.archive = archive
+    
+  def getBase(self):
+    self.basePath = self.ar
+    self.basePath = self._remote_call('base')['base']
+    if not self.basePath.endswith('/'):
+      self.basePath = self.basePath + '/'
+    
+    return self.basePath
+    
+  def files(self):
+    return self._remote_call('list')['files']
+    
+  def addFile(self, params):
+    return self._remote_call('add', params)['result']
+    
+  def removeFile(self, params):
+    return self._remote_call('remove', params)['result']
+    
+def oxdb_md5sum(fname):
+    '''Returns an md5 hash for file'''
+    msum = None
+    if os.path.exists(fname):
+      f = file(fname, 'rb')
+      m = md5.new()
+      while True:
+          d = f.read(8096)
+          if not d:
+              break
+          m.update(d)
+      md5sum = m.hexdigest()
+      f.close()
+    return md5sum
+
+_oxdb_extensions = (
+  '.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb',
+  '.mp3', '.wav',
+  '.srt', '.sub', '.idx', '.rar',
+  '.jpg', '.png',
+)
+
+def _oxdb_file_blacklist(f):
+  if f.startswith('.'):
+    return True
+  return False
+  
+def oxdb_spider(archive_base):
+  oxdb_files = []
+  for root, dirs, files in os.walk(archive_base):
+    for d in dirs:
+      oxdb_files.extend(oxdb_spider(os.path.join(root, d)))
+    for f in files:
+      if not _oxdb_file_blacklist(f):
+        if os.path.splitext(f)[1] in _oxdb_extensions:
+          oxdb_files.append(os.path.join(root, f))      
+  return oxdb_files
+
+_known_oxdb_extensions = ['Interview']
+_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
+
+def _in_known_oxdb_extensions(term):
+  '''
+    used to remove parts that are known to not be part of the title
+  '''
+  if term in _known_oxdb_extensions:
+    return True
+  for reg in _known_oxdb_extensions_reg:
+    if re.compile(reg, re.IGNORECASE).findall(term):
+      return True
+  return False
+
+def oxdb_title(title):
+  '''
+    normalize filename to get movie title
+  '''
+  title = title.split('.')[:-1]
+  while len(title) > 1 and ( \
+          _in_known_oxdb_extensions(title[-1])  or \
+          title[-1].startswith('Part ') or \
+          len(title[-1]) == 2 or \
+          len(title[-1]) == 4):
+      title = title[:-1]
+  title = ".".join(title)
+  return title
+
+def oxdb_import_files(archive):
+  stats = {'skipped': 0, 'added': 0, 'remove':0}
+  oxdb_backend = OXDb()
+  base = oxdb_backend.getBase()
+  print base
+  files = oxdb_spider(base)
+  
+  oxdb_files = oxdb_backend.files()
+  md5sum_on_disk = []
+  for f in files:
+    meta = oxdb_file_stats(f)
+    f = f.replace(base, '')
+    if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
+      stats['skipped'] += 1
+      md5sum_on_disk.append(oxdb_files[f]['md5sum'])
+    else:
+      meta = oxdb_file_metadata(meta)
+      #remove base
+      meta['path'] = f.encode('utf-8')
+      #ignore files changed in the last 5 minutes
+      if time.time() - meta['date'] > 300:
+        print oxdb_backend.addFile(meta), f
+        stats['added'] += 1
+      else:
+        print "to hot, skipping for now", f
+      md5sum_on_disk.append(meta['md5sum'])
+  for f in oxdb_files:
+    if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
+      print "remove", f
+      oxdb_backend.removeFile({'md5sum':oxdb_files[f]['md5sum']})
+      stats['remove'] += 1
+  print stats
+  return stats
+
+def oxdb_file_stats(fname):
+  stat = os.stat(fname)
+  size = stat.st_size
+  date = stat.st_mtime
+  return {
+    'path': fname,
+    'size': size,
+    'date': date,
+  }
+  
+def oxdb_file_metadata(meta):
+  '''
+    check if file is in db, add otherwise
+  '''
+  meta['video'] = ''
+  meta['audio'] = ''
+  meta['length'] = 0
+  meta['bpp'] = 0
+  meta['md5sum'] = oxdb_md5sum(meta['path'])
+  #FIXME: use midentifiy or other to get more info about file
+  return meta
+  
+  title = oxdb_title(os.path.basename(meta['path']))
+  director = os.path.basename(os.path.dirname(meta['path']))
+  print '"%s"' % title, ' by', director
+  #imdb = oxdb_backend.byMD5Sum(md5sum)
+
+# if invoked on command line, print md5 hashes of specified files.
+if __name__ == '__main__':
+  #for fname in sys.argv[1:]:
+  #  print oxdb_md5sum(fname), fname
+  oxdb_import_files()
+  
--- a/oxdbarchive/oxdb_utils.py
+++ b/oxdbarchive/oxdb_utils.py
@ -0,0 +1,210 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=2:sts=2:ts=2
+# OXDb Import client, crawls the filesystem and gathers information about 
+# movies
+#
+
+import md5
+import os
+import sys
+import re
+import urllib
+
+from scrapeit.utils import read_url
+
+_oxdb_file_blacklist = ['.DS_Store']
+_oxdb_extensions = [
+  '.avi', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4',
+  '.srt', '.sub', '.idx'
+]
+
+_known_oxdb_extensions = ['Interview']
+_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
+
+def _in_known_oxdb_extensions(term):
+  '''
+    used to remove parts that are known to not be part of the title
+  '''
+  if term in _known_oxdb_extensions:
+    return True
+  for reg in _known_oxdb_extensions_reg:
+    if re.compile(reg, re.IGNORECASE).findall(term):
+      return True
+  return False
+  
+def oxdb_filenameUmlaute(string):
+  string = u"%s" % string
+  string = string.replace(u'ae', u'ä')
+  string = string.replace(u'oe', u'ö')
+  string = string.replace(u'ue', u'ü')
+  string = string.replace(u'Ae', u'Ä')
+  string = string.replace(u'Oe', u'Ö')
+  string = string.replace(u'Ue', u'Ü')
+  return string
+  
+def oxdb_director(director):
+  director = os.path.basename(os.path.dirname(director))
+  director.replace(' & ', ', ')
+  return director
+  
+def oxdb_title(title):
+  '''
+    normalize filename to get movie title
+  '''
+  title = os.path.basename(title).replace('. ', '_dot__space_')
+  title = title.replace(' .', '_space__dot_')
+  title = title.split('.')[0]
+  title = title.replace('_dot__space_', '. ')
+  title = title.replace('_space__dot_', ' .')
+  return title
+
+
+def oxdb_id(title, director):
+  key = u"%s/%s" % (director,title)
+  oxdb_id = '0x%s' % md5.new(key.encode('utf-8')).hexdigest()
+  return oxdb_id
+
+'''
+seperate number with thousand comma
+'''
+def formatNumber(n, sep=','):
+  ln = list(str(n))
+  ln.reverse()
+  newn = []
+  while len(ln) > 3:
+    newn.extend(ln[:3])
+    newn.append(sep)
+    ln = ln[3:]
+    newn.extend(ln)
+    newn.reverse()
+  return "".join(newn)
+  
+'''
+format runtime for stats
+'''
+def oxdb_runtimeformat(runtime):
+  if runtime == 0:
+    return ''
+  if runtime < 60:
+    return "%s sec" % runtime
+  minutes = int(runtime / 60)
+  seconds = runtime % 60
+  if minutes < 900:
+    return "%s min" % minutes
+  hours = int(minutes / 60)
+  minutes = minutes % 60
+  if hours < 24:
+    return "%s hours %s minutes %s seconds" % (hours, minutes, seconds)
+  days = int(hours / 24)
+  hours = hours % 24
+  if days < 365:
+    return "%s days %s hours %s minutes %s seconds" % (days, hours, minutes, seconds)
+  years = int(days / 365)
+  days = days % 365
+  return "%s years %s days %s hours %s minutes %s seconds" % (years, days, hours, minutes, seconds)
+  
+def oxdb_lengthformat(mseconds):
+  """
+    Format mseconds in a nice way
+  """
+  seconds = mseconds/1000
+  minutes = int(seconds / 60)
+  seconds = seconds % 60
+  hours = int(minutes / 60)
+  minutes = minutes % 60
+  if hours > 24:
+    days = int(hours / 24)
+    hours = hours % 24
+    return "%d:%02d:%02d:%02d" % (days, hours, minutes, seconds)
+  return "%02d:%02d:%02d" % (hours, minutes, seconds)
+
+"""
+Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102
+bytes, etc).
+  number - number to format.
+  long_name - long name. i.e. byte
+  short - short name, i.e. B
+"""
+def oxdb_format(number, long_name, short):
+  if not number:
+    return "0 %ss" % long_name
+  number = float(number)
+  if number < 1024:
+    return "%d %s%s" % (number, long_name, number != 1 and 's' or '')
+  if number < 1024 * 1024:
+    return "%d K%s" % ((number / 1024), short)
+  if number < 1024 * 1024 * 1024:
+    return "%.1f M%s" % (number / (1024 * 1024), short)
+  if number < 1024 * 1024 * 1024 * 1024:
+    return "%.2f G%s" % (number / (1024 * 1024 * 1024), short)
+  return "%.3f T%s" % (number / (1024 * 1024 * 1024 * 1024), short)
+
+def oxdb_filesizeformat(number):
+  return oxdb_format(number, 'byte', 'B')
+
+def oxdb_bitformat(number):
+  return oxdb_format(number, 'bit', 'b')
+
+def oxdb_pixelformat(number):
+  return oxdb_format(number, 'pixel', 'px')
+  
+
+from htmlentitydefs import name2codepoint
+
+# This pattern matches a character entity reference (a decimal numeric
+# references, a hexadecimal numeric reference, or a named reference).
+charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
+
+def htmldecode(text):
+  """Decode HTML entities in the given text."""
+  if type(text) is unicode:
+    uchr = unichr
+  else:
+    uchr = lambda value: value > 255 and unichr(value) or chr(value)
+  def entitydecode(match, uchr=uchr):
+    entity = match.group(1)
+    if entity.startswith('#x'):
+      return uchr(int(entity[2:], 16))
+    elif entity.startswith('#'):
+      return uchr(int(entity[1:]))
+    elif entity in name2codepoint:
+      return uchr(name2codepoint[entity])
+    else:
+      return match.group(0)
+  return charrefpat.sub(entitydecode, text)
+
+def highlight(text, query):
+  if query:
+    m = re.compile("(%s)" % re.escape(query), re.IGNORECASE).findall(text)
+    for i in m:
+      text = re.sub("(%s)" % re.escape(i), '<span class="textHighlight">\\1</span>', text)
+  return text
+
+def imdb2oxdb(imdb):
+  if imdb.startswith('0x') or imdb.startswith('ox') :
+    return imdb
+  return "0x%06X" % int(imdb)
+  
+def oxdb2imdb(oxdb):
+  if len(oxdb) == 8:
+    return "%07d" % float(oxdb)
+  return oxdb
+  
+def trimString(string, length):
+  if len(string) > length:
+    string = string[:length - 13] + '...' + string[-10:]
+  return string
+
+languages = ('be', 'bg', 'ba', 'wo', 'bn', 'bo', 'bh', 'bi', 'ji', 'br', 'ja', 
+ 'ru', 'rw', 'tl', 'rm', 'rn', 'ro', 'gu', 'jw', 'gd', 'ga', 'sv', 'gn', 'gl',
+ 'om', 'tn', 'fa', 'oc', 'ss', 'or', 'hy', 'hr', 'sw', 'hu', 'hi', 'su', 'ha', 
+ 'ps', 'pt', 'sk', 'pa', 'pl', 'el', 'eo', 'en', 'zh', 'sm', 'eu', 'et', 'sa', 
+ 'es', 'mg', 'uz', 'ml', 'mo', 'mn', 'mi', 'as', 'mk', 'ur', 'mt', 'ms', 'mr',
+ 'my', 'fr', 'fy', 'ia', 'zu', 'fi', 'fj', 'fo', 'nl', 'no', 'na', 'ne', 'xh',
+ 'co', 'ca', 'cy', 'cs', 'ka', 'kk', 'sr', 'sq', 'ko', 'kn', 'km', 'kl', 'ks', 
+ 'si', 'sh', 'so', 'sn', 'ku', 'sl', 'ky', 'sg', 'sd', 'yo', 'de', 'da', 'dz',
+ 'la', 'ln', 'lo', 'tt', 'tr', 'ts', 'lv', 'to', 'lt', 'tk', 'th', 'ti', 'tg',
+ 'te', 'ta', 'aa', 'ab', 'uk', 'af', 'vi', 'is', 'am', 'it', 'iw', 'vo', 'ik', 
+ 'ar', 'in', 'ay', 'az', 'ie', 'qu', 'st', 'tw')
--- a/oxdbarchive/release.py
+++ b/oxdbarchive/release.py
@ -0,0 +1,14 @@
+# Release information about oxdbarchive
+
+version = "1.0"
+
+# description = "Your plan to rule the world"
+# long_description = "More description about your plan"
+# author = "Your Name Here"
+# email = "YourEmail@YourDomain"
+# copyright = "Vintage 2006 - a good year indeed"
+
+# if it's open source, you might want to specify these
+# url = "http://yourcool.site/"
+# download_url = "http://yourcool.site/download"
+# license = "MIT"
--- a/oxdbarchive/subtitles.py
+++ b/oxdbarchive/subtitles.py
@ -0,0 +1,278 @@
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+import re
+import os
+from os.path import abspath, join, dirname
+import shutil
+
+import chardet
+
+img_extension = 'jpg'
+
+def srt2txt(srt, encoding = "latin-1"):
+  subtitles = srt2dict(srt, encoding)
+  txt = ''
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    txt += "%s\n\n" % subtitles["%s" % k]['text']
+  return txt.strip()
+  
+def srt2dict(srt, encoding = "latin-1"):
+  '''convert srt string into a dict in the form
+     dict(num = dict(start, stop, text))
+  '''
+  subdict = {}
+  srt = srt.replace('\r', '').strip()
+  subtitles = srt.strip().split('\n\n')
+  for subtitle in subtitles:
+    if subtitle.strip():
+      subtitle = subtitle.strip().split('\n')
+      if len(subtitle) > 2:
+        start_stop = subtitle[1].split(' --> ')
+        subtitle[0] =u"%s" % int(subtitle[0])
+        subdict[subtitle[0]] = {
+          'start': start_stop[0], 
+          'stop': start_stop[1],
+          'text': u'\n'.join(subtitle[2:]),
+          }
+  return subdict
+
+def dict2srt(subtitles, encoding = "latin-1"):
+  '''convert dict in the form dict(num = dict(start, stop, text))
+     into an srt file
+  '''
+  srt = ''
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    k = "%s" % k
+    srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
+      k, 
+      subtitles[k]['start'], 
+      subtitles[k]['stop'], 
+      subtitles[k]['text'])
+  srt = srt.strip()
+  return srt.encode(encoding)
+  
+def time_str2msec(time_string):
+  from datetime import datetime, timedelta
+  import time
+  if len(time_string.split(',')) > 1:
+    msec = float("0." + time_string.split(',')[-1])
+  else:
+    msec = 0.0
+  time_string = time_string.split(',')[0]
+  time_string = "2007 " + time_string
+  offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
+  base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
+  return int((offset - base) * 1000)
+
+def msec2time_str(msec):
+  import time
+  msec_string = "%s" % msec
+  ms = ",%s" % msec_string[-3:]
+  sec = float(msec) / 1000
+  return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
+
+def shift_time(offset, time_string):
+  ''' return time shifted by offset milliseconds
+      format of time is expedted to be 01:50:52,123
+  '''
+  new_time = time_str2msec(time_string) + offset
+  return msec2time_str(new_time)
+  
+def shift_subtitles(offset, offset_num, subtitles):
+  '''
+    shifts a subtitle by offset, where offsest is a tuple (time, position)
+  '''
+  sdict = {}
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    ko = "%s" % (k + offset_num)
+    sdict[ko] = subtitles["%s" % k]
+    sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
+    sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
+  return sdict
+  
+def merge_subtitles(subtitles):
+  '''
+    converts a list of subtitles / dict(txt, length)
+    into one srt subtitle
+  '''
+  subs = {}
+  offset = 0
+  for k in sorted(subtitles.keys()):
+    sdict = srt2dict(subtitles[k]['txt'])
+    if offset:
+      sdict = shift_subtitles(offset, len(subs), sdict)
+    for key in sdict:
+      subs[key] = sdict[key]
+    offset += subtitles[k]['length']
+  return dict2srt(subs)
+
+def split_subtitle(subtitles, offset):
+  '''
+    split subtitles at offset
+  '''
+  offset_time = time.strftime("%H:%M:%S", offset)
+  one = {}
+  two = {}
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    if subtitles['stop'] < offset_time:
+      one[k] = subtitle[k]
+    else:
+      two[k] = subtitle[k]
+  two = shift_subtitles(-offset, -len(two), two)
+
+def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
+  ext = movie_file.split('.')[-1]
+  if ext in ('sub', 'srt'):
+    print "this is not a movie file, will not try to extract frames"
+    return
+  if offset:
+    print "Inpoint ", inpoint,
+    inpoint = shift_time(-offset, inpoint)
+    outpoint = shift_time(-offset, outpoint)
+    print " becomes ", inpoint
+  
+  print "extracting %s -> %s" % (inpoint, outpoint)
+  duration = time_str2msec(outpoint) - time_str2msec(inpoint)
+  inpoint = time_str2msec(inpoint)
+  extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
+
+  cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
+  os.system(cmd.encode('utf-8'))
+  
+def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
+  import warnings
+  warnings.filterwarnings("ignore", "tempnam")
+  ext = movie_file.split('.')[-1]
+  if ext in ('sub', 'srt', 'mkv'):
+    print "this is not a movie file, will not try to extract frames"
+    return
+  framedir = os.tempnam()
+  os.mkdir(framedir)
+  os.chdir(framedir)
+  if offset:
+    print "Inpoint ", inpoint,
+    inpoint = shift_time(-offset, inpoint)
+    outpoint = shift_time(-offset, outpoint)
+    print " becomes ", inpoint
+  print "extracting %s -> %s" % (inpoint, outpoint)
+  outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
+
+  audiorate = "44100"
+  if os.path.exists(movie_file):
+    mencoder_options = ''
+    mencoder_options += " '%s'" % movie_file
+    mencoder_options += " -ss '%s' -endpos %0.2f" % (inpoint, outpoint)
+    mencoder_options += ' -ovc copy -oac copy -o tempfile.avi '
+    mencoder = "mencoder %s >/dev/null 2>&1" % mencoder_options
+    #print mencoder.encode('utf-8')
+    os.system(mencoder.encode('utf-8'))
+    
+    ffmpeg_options = ''
+    #ffmpeg_options += " -ss '%s' -t %0.2f" % (inpoint, outpoint)
+    ffmpeg_options += " -y -i 'tempfile.avi'"
+    ffmpeg_options += " -ar %s -b 128000 '%s'" % (audiorate, flash_file)
+    ffmpeg = "ffmpeg %s >/dev/null 2>&1" % ffmpeg_options
+    #print ffmpeg.encode('utf-8')
+    os.system(ffmpeg.encode('utf-8'))
+  else:
+    print "update the cache %s missing" % movie_file.encode('utf-8')
+  shutil.rmtree(framedir)
+  
+def extract_frame(movie_file, timestamp, img_folder, width=128, offset = 0, redo = False):
+  import warnings
+  warnings.filterwarnings("ignore", "tempnam")
+  ext = movie_file.split('.')[-1]
+  if ext in ('sub', 'srt'):
+    print "this is not a movie file, will not try to extract frames"
+    return
+  framedir = os.tempnam()
+  
+  os.mkdir(framedir)
+  os.chdir(framedir)
+  if offset:
+    timestamp_in_file = shift_time(-offset, timestamp)
+  else:
+    timestamp_in_file = timestamp
+  if os.path.exists(movie_file):
+    mplayer_options = ''
+    mplayer_options += " '%s'" % movie_file
+    mplayer_options += " -ss '%s' -frames 2" % (timestamp_in_file)
+    mplayer_options += " -vo jpeg:quality=90 -vf scale -zoom -xy %d " % width
+    mplayer_options += " -ao null"
+    mplayer = "mplayer %s >/dev/null 2>&1" % mplayer_options
+    frame = os.path.join(img_folder, "%s.%s" % (timestamp.replace(':', '.'), img_extension))
+    if redo or not os.path.exists(frame):
+      print mplayer.encode('utf-8')
+      os.system (mplayer.encode('utf-8'))
+      files = os.listdir(framedir)
+      if files:
+        print "creating frame ", frame
+        shutil.move(os.path.join(framedir,files[-1]), frame)
+        if len(files)>1:
+          for f in files[:-2]:
+            print "unlink", f
+            os.unlink(f)
+  else:
+    print "update the cache %s missing" % movie_file
+  shutil.rmtree(framedir)
+  
+  
+def extract_subtitles(movie_file, srt, img_folder, width=128, offset = 0, redo = False):
+  subtitles = srt2dict(srt)  
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    timestamp = subtitles["%s" % k]['start']
+    extract_frame(movie_file, timestamp, img_folder, width, offset, redo)
+
+def detectEncoding(fp):
+    bomDict={ # bytepattern : name              
+             (0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",        
+             (0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
+             (0xFE, 0xFF, None, None) : "utf_16_be", 
+             (0xFF, 0xFE, None, None) : "utf_16_le", 
+             (0xEF, 0xBB, 0xBF, None) : "utf_8",
+            }
+
+    # go to beginning of file and get the first 4 bytes
+    oldFP = fp.tell()
+    fp.seek(0)
+    (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
+
+    # try bom detection using 4 bytes, 3 bytes, or 2 bytes
+    bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
+    if not bomDetection :
+        bomDetection = bomDict.get((byte1, byte2, byte3, None))
+        if not bomDetection :
+            bomDetection = bomDict.get((byte1, byte2, None, None))
+
+    ## if BOM detected, we're done :-)
+    fp.seek(oldFP)
+    if bomDetection :
+        return bomDetection
+
+    encoding = 'latin-1'
+    #more character detecting magick using http://chardet.feedparser.org/
+    fp.seek(0)
+    rawdata = fp.read()
+    encoding = chardet.detect(rawdata)['encoding']
+    fp.seek(oldFP)
+    return encoding
+
+def loadSrt(fname):
+  f = open(fname)
+  encoding = detectEncoding(f)
+  data = f.read()
+  f.close()
+  try:
+    udata = unicode(data, encoding)
+  except:
+    try:
+      udata = unicode(data, 'latin-1')
+    except:
+      print "failed to detect encoding, giving up"
+      udate = u''
+  if udata.startswith(u'\ufeff'): 
+    udata = udata[1:]
+  return udata
+  
--- a/oxdbarchive/templates/init.py
+++ b/oxdbarchive/templates/init.py
--- a/oxdbarchive/tests/init.py
+++ b/oxdbarchive/tests/init.py
--- a/oxdbarchive/tests/test_controllers.py
+++ b/oxdbarchive/tests/test_controllers.py
@ -0,0 +1,32 @@
+import unittest
+import turbogears
+from turbogears import testutil
+from oxdbarchive.controllers import Root
+import cherrypy
+
+cherrypy.root = Root()
+
+class TestPages(unittest.TestCase):
+
+    def setUp(self):
+        turbogears.startup.startTurboGears()
+
+    def tearDown(self):
+        """Tests for apps using identity need to stop CP/TG after each test to
+        stop the VisitManager thread. 
+        See http://trac.turbogears.org/turbogears/ticket/1217 for details.
+        """
+        turbogears.startup.stopTurboGears()
+
+    def test_method(self):
+        "the index method should return a string called now"
+        import types
+        result = testutil.call(cherrypy.root.index)
+        assert type(result["now"]) == types.StringType
+
+    def test_indextitle(self):
+        "The indexpage should have the right title"
+        testutil.createRequest("/")
+        response = cherrypy.response.body[0].lower() 
+        assert "<title>welcome to turbogears</title>" in response
+
--- a/oxdbarchive/tests/test_model.py
+++ b/oxdbarchive/tests/test_model.py
@ -0,0 +1,23 @@
+# If your project uses a database, you can set up database tests
+# similar to what you see below. Be sure to set the db_uri to
+# an appropriate uri for your testing database. sqlite is a good
+# choice for testing, because you can use an in-memory database
+# which is very fast.
+
+from turbogears import testutil, database
+# from oxdbarchive.model import YourDataClass, User
+
+# database.set_db_uri("sqlite:///:memory:")
+
+# class TestUser(testutil.DBTest):
+#     def get_model(self):
+#         return User
+#
+#     def test_creation(self):
+#         "Object creation should set the name"
+#         obj = User(user_name = "creosote",
+#                       email_address = "spam@python.not",
+#                       display_name = "Mr Creosote",
+#                       password = "Wafer-thin Mint")
+#         assert obj.display_name == "Mr Creosote"
+
--- a/oxdbarchive/timeline.py
+++ b/oxdbarchive/timeline.py
@ -0,0 +1,110 @@
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+import Image
+import math
+from StringIO import StringIO
+
+import oxdb_cache
+from subtitles import srt2dict, time_str2msec
+
+
+lineWidth = 600
+timlelineHeight = 16
+rowHeight = timlelineHeight + 2 * 4 
+
+'''
+  returns timeline view as a png image for a given movie.
+'''
+def loadTimeline(movie, lines = -1):
+  length = int(movie.length / 1000)
+  l = int(math.ceil(length / lineWidth) + 1)
+  if lines == -1 or l < lines:
+    lines = l
+  size = (lineWidth, rowHeight * lines)
+  timelineColor = (64, 64, 64)
+  i = Image.new("RGBA", size)
+  for currentLine in range(0, lines):
+    offset = currentLine * rowHeight + 4
+    try:
+      data = oxdb_cache.loadTimeline(movie, "%02d" % (currentLine * 10))
+      f = StringIO(data)
+      t = Image.open(f)
+      t = t.convert('RGBA')
+      box = (0, offset , t.size[0], offset + t.size[1])
+      i.paste(t, box)
+    except:
+      width = lineWidth
+      if currentLine == lines -1:
+        width = length - (lines - 1) * lineWidth
+      box = ((0, offset , width, offset + timlelineHeight))
+      i.paste(timelineColor, box)
+  f = StringIO()
+  i.save(f, 'PNG')
+  return f.getvalue()
+  
+'''
+  returns timeline overlay as a png image for a given movie
+  query is used to only highlight scenes matching query
+'''
+def loadTimelineOverlay(movie, query, lines = -1):
+  background = (255,255,255,0)
+  marker = (255,255,0,128)
+  markerBorder = (255,255,0,255)
+  
+  length = int(movie.length / 1000)
+  l = int(math.ceil(length / lineWidth) + 1)
+  if lines == -1 or l < lines:
+    lines = l
+  size = (lineWidth, rowHeight * lines)
+  mask = Image.new("RGBA", size, background)
+  
+  for subtitle in movie.overlay(query):
+    start = int(round(time_str2msec(subtitle.start) / 1000))
+    stop = int(round(time_str2msec(subtitle.stop) / 1000))
+    if start < stop:
+      currentLine = math.ceil(start / lineWidth)
+      if currentLine <= l:
+        offset = currentLine * rowHeight + 4
+        start = start - ((currentLine) * lineWidth)
+        stop = stop - ((currentLine) * lineWidth)
+        box = ((start, offset -1, stop, offset + timlelineHeight + 1))
+        mask.paste(marker, box)
+        borderBox = ((start, offset -1, stop, offset))
+        mask.paste(markerBorder, borderBox)
+        borderBox = ((start, offset + timlelineHeight, stop, offset + timlelineHeight +1))
+        mask.paste(markerBorder, borderBox)
+  
+  f = StringIO()
+  mask.save(f, 'PNG', quality=70)
+  return f.getvalue()
+
+  return Image.composite(image, overlay, mask)
+  
+'''
+  returns an image map marking all the scenes with mouse events 
+  for a given movie.
+'''
+def loadTimelineImageMap(movie):
+  s = movie.subtitleDict
+  length = int(movie.length / 1000)
+  
+  imageMap ='<map name="timelineImageMap">'
+  for key in sorted([int(k) for k in s.keys()]):
+    sub = s["%s" % key]
+    start = int(round(time_str2msec(sub['start']) / 1000))
+    stop = int(round(time_str2msec(sub['stop']) / 1000))
+    if start < stop:
+      currentLine = math.ceil(start / lineWidth)
+      offset = int(currentLine * rowHeight + 4)
+      start = int(start - ((currentLine) * lineWidth))
+      stop = int(stop - ((currentLine) * lineWidth))
+      box = (start, offset -1, stop, offset + timlelineHeight + 1)
+      area = '<area class="timelineMarker" shape="rect" coords="%s, %s, %s, %s"' % box
+      area += ' onMouseOver="iS(%s, %s, %s, %s)" ' % (start, stop, offset, key)
+      area += ' onClick="cS(%s, %s, %s, %s)" ' % (start, stop, offset, key)
+      area += ' onMouseOut="oS()" />'
+      imageMap += area
+  imageMap += "</map>"
+  return imageMap
--- a/oxdbarchive/tools/extract_clip.py
+++ b/oxdbarchive/tools/extract_clip.py
@ -0,0 +1,392 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=2:sts=2:ts=2
+#
+import os
+import time
+
+import gobject
+gobject.threads_init()
+
+import pygst
+pygst.require("0.10")
+import gst
+import Image
+
+import singledecodebin
+
+
+class FirstFrame:
+  getShot = False
+  length = 0
+  height = 0
+  def __init__(self, videofile, png='', nseconds=-1, width=128):
+    s = ''' filesrc name=input 
+          ! decodebin name=dbin 
+          ! queue name =q 
+          ! ffmpegcolorspace ! video/x-raw-rgb 
+          ! fakesink name=output signal-handoffs=true
+        '''
+
+    self.width = width
+    self.pipeline = gst.parse_launch(s)
+    self.input  = self.pipeline.get_by_name('input')
+    self.fakesink = self.pipeline.get_by_name('output')
+    self.dbin = self.pipeline.get_by_name('dbin')
+    self.bus = self.pipeline.get_bus()
+    self.input.props.location = videofile
+
+    self.pipeline.set_state(gst.STATE_PAUSED)
+    self.pipeline.get_state()
+
+    #length
+    queue = self.pipeline.get_by_name('q')
+    pads = queue.sink_pads()
+    q = gst.query_new_duration(gst.FORMAT_TIME)
+    for pad in pads:
+      if pad.get_peer() and pad.get_peer().query(q):
+        format, self.length = q.parse_duration()
+
+    if nseconds>-1 and png:
+      self.png(png, nseconds)
+
+  def close(self):
+    self.pipeline.set_state(gst.STATE_NULL)
+    self.pipeline.get_state()
+
+  def seek(self, nseconds):
+    if(self.length and self.length < nseconds):
+      nseconds = 0
+    event = gst.event_new_seek(1.0, gst.FORMAT_TIME,
+            gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
+            gst.SEEK_TYPE_SET, nseconds,
+            gst.SEEK_TYPE_NONE, 0)
+
+    res = self.fakesink.send_event(event)
+    if res:
+      self.pipeline.set_new_stream_time(0L)
+    else:
+        gst.error("seek to %r failed" % nseconds)
+
+  def png(self, png, nseconds):
+    self.png_frame = png
+    self.pipeline.set_state(gst.STATE_PAUSED)
+    self.pipeline.get_state()
+    self.seek(nseconds)
+    self.pipeline.set_state(gst.STATE_PLAYING)
+    self.pipeline.get_state()
+
+    ho = self.fakesink.connect("handoff", self.snapshot_png_handoff_cb)
+    self.getShot = True
+    while self.getShot:
+      msg = self.bus.poll(gst.MESSAGE_ANY, gst.SECOND)
+      if not msg:
+        break
+    self.fakesink.disconnect(ho)
+
+  def snapshot_png_handoff_cb(self, sink, buffer, pad):
+    if self.getShot:
+      caps = sink.sink_pads().next().get_negotiated_caps()
+      for s in caps:
+        input_d = (s['width'], s['height'])
+        output_d = self.scaleto(s['width'], s['height'])
+      img = Image.fromstring('RGB',input_d,buffer)
+      img = img.resize(output_d, Image.ANTIALIAS)
+      img.save(self.png_frame)
+      self.getShot=False
+
+  def scaleto(self, width, height):
+    height = int(self.width / (float(width) / height))
+    height = height - height % 2
+    self.height = height
+    return (self.width, height)
+
+def file_gnl_src(name, uri, caps, start, duration, offset, priority):
+  src = singledecodebin.SingleDecodeBin(caps, uri)
+  gnlsrc = gst.element_factory_make('gnlsource', name)
+  gnlsrc.props.start = start
+  gnlsrc.props.duration = duration
+  gnlsrc.props.media_start = offset
+  gnlsrc.props.media_duration = duration
+  gnlsrc.props.priority = priority
+  gnlsrc.add(src)
+  
+  return gnlsrc
+
+class VideoEncoder:
+  def __init__(self, output= 'ogg', width = 0, height = 0):
+    self.bin = gst.Bin('VideoEncoder')
+    self._queue_in = gst.element_factory_make("queue")
+    self._queue_in.set_property("max-size-buffers",500)
+    self._queue_in.set_property("max-size-time",0)
+    self._queue_in.set_property("max-size-bytes",0)
+    
+    self._queue_out = gst.element_factory_make("queue")
+    self._queue_out.set_property("max-size-buffers",500)
+    
+    self._identity = gst.element_factory_make("identity")
+    self._identity.set_property('single-segment', True)
+    
+    self._ffmpegcolorspace = gst.element_factory_make("ffmpegcolorspace")
+    self._videorate = gst.element_factory_make("videorate")
+    self._videoscale = gst.element_factory_make("videoscale")
+    self._videoscale.props.method = 1
+    
+    if output == 'flv':
+      self.encoder = gst.element_factory_make("ffenc_flv")
+      self.encoder.set_property('bitrate', 200000)
+    elif output == 'mp4':
+      self.encoder = gst.element_factory_make("x264enc")
+      self.encoder.set_property('bitrate', 200)
+    else:
+      self.encoder = gst.element_factory_make("theoraenc")
+      self.encoder.set_property("bitrate", 2000)
+      self.encoder.set_property('sharpness', 1)
+    
+    self.bin.add(
+                 self._queue_in,
+                 self._identity,
+                 self._ffmpegcolorspace,
+                 self._videorate,
+                 self._videoscale,
+                 self.encoder,
+                 self._queue_out
+                )
+
+
+    self._queue_in.link(self._videorate)
+
+    fps = "25/1"
+    caps = "video/x-raw-yuv, framerate=(fraction)%s" % fps
+    gst_caps = gst.caps_from_string(caps)
+    self._videorate.link(self._identity, gst_caps)
+
+    self._identity.link(self._ffmpegcolorspace)
+    
+    self._ffmpegcolorspace.link(self._videoscale)
+    
+    if width:
+      caps = "video/x-raw-yuv, width=%d, " % width
+      if height:
+        caps += "height=%d," % height
+      caps += "pixel-aspect-ratio=(fraction)1/1"
+      gst_caps = gst.caps_from_string(caps)
+      self._videoscale.link(self.encoder, gst_caps)
+    else:
+      self._videoscale.link(self.encoder)
+    self.encoder.link(self._queue_out)
+    
+    # Create GhostPads
+    self.bin.add_pad(gst.GhostPad('sink', self._queue_in.get_pad('sink')))
+    self.bin.add_pad(gst.GhostPad('src', self._queue_out.get_pad('src')))
+
+class AudioEncoder:
+  def __init__(self, output = 'ogg', samplerate = 44100, channels = 2):
+    
+    self.bin = gst.Bin('AudioEncoder')
+    self._queue_in = gst.element_factory_make("queue")
+    self._queue_in.set_property("max-size-buffers",500)
+    self._queue_in.set_property("max-size-time",0)
+    self._queue_in.set_property("max-size-bytes",0)
+    
+    self._queue_out = gst.element_factory_make("queue")
+    self._queue_out.set_property("max-size-buffers",500)
+
+    self._audiorate = gst.element_factory_make("audiorate")
+
+    self._identity = gst.element_factory_make("identity")
+    self._identity.set_property('single-segment', True)
+
+    self._audioconvert = gst.element_factory_make("audioconvert")
+    
+    self._audioresample = gst.element_factory_make("audioresample")
+    
+    if output == 'flv':
+      #MP3
+      self.encoder = gst.element_factory_make("lame")
+      self.encoder.set_property("bitrate", 32)
+      self._mp3parse = gst.element_factory_make("mp3parse")
+      self.bin.add(self._mp3parse)
+      caps = "audio/x-raw-int,rate=%d,channels=%d" % (samplerate, channels)
+    elif output == 'mp4':
+      #AAC
+      self.encoder = gst.element_factory_make("faac")
+      #self.encoder.set_property("bitrate", 32)
+      caps = "audio/x-raw-int,rate=%d,channels=%d" % (samplerate, channels)
+    else:
+      #Vorbis
+      self.encoder = gst.element_factory_make("vorbisenc")
+      self.encoder.set_property("quality", 0)
+      caps = "audio/x-raw-float,rate=%d,channels=%d" % (samplerate, channels)
+    
+    self.bin.add(
+                 self._queue_in,
+                 self._audiorate,
+                 self._identity,
+                 self._audioconvert,
+                 self._audioresample,
+                 self.encoder,
+                 self._queue_out
+                )
+    
+    self._queue_in.link(self._audiorate)
+    self._audiorate.link(self._identity)
+    self._identity.link(self._audioconvert)
+
+    self._audioconvert.link(self._audioresample)
+    
+    arate = gst.caps_from_string(caps)
+    self._audioresample.link(self.encoder, arate)
+    
+    if output == 'flv':
+      self.encoder.link(self._mp3parse)
+      self._mp3parse.link(self._queue_out)
+    else:
+      self.encoder.link(self._queue_out)
+    
+    # Create GhostPads
+    self.bin.add_pad(gst.GhostPad('sink', self._queue_in.get_pad('sink')))
+    self.bin.add_pad(gst.GhostPad('src', self._queue_out.get_pad('src')))
+
+class ExtractClip:
+  acaps = gst.caps_from_string("audio/x-raw-int;audio/x-raw-float")
+  vcaps = gst.caps_from_string("video/x-raw-rgb; video/x-raw-yuv")  
+  width = 0
+  samplerate = 44100
+  channels = 2
+  
+  def __init__(self, location, outfile, offset, duration, width, height, samplerate, channels):
+    self.mainloop = gobject.MainLoop()
+    
+    if outfile.endswith('flv'):
+      self.outputformat = 'flv'
+    elif outfile.endswith('mp4'):
+      self.outputformat = 'mp4'
+    else:
+      self.outputformat = 'ogg'
+    
+    self.width = width
+    self.height = height
+    self.samplerate = samplerate
+    self.channels = channels
+    self._pipeline = gst.Pipeline()
+    
+    self.bus = self._pipeline.get_bus()
+    self.bus.add_signal_watch()
+    self.watch_id = self.bus.connect("message", self.onBusMessage)
+    
+    self.atimeline = gst.element_factory_make("gnlcomposition", "ATimeline")
+    self.atimeline.connect("pad-added", self._on_new_apad)
+    self._pipeline.add(self.atimeline)
+
+    self.vtimeline = gst.element_factory_make("gnlcomposition", "VTimeline")
+    self.vtimeline.connect("pad-added", self._on_new_vpad)
+    self._pipeline.add(self.vtimeline)
+    
+    #Mux streams
+    if self.outputformat == 'flv':
+      self._mux =  gst.element_factory_make("ffmux_flv")
+    elif self.outputformat == 'mp4':
+      self._mux =  gst.element_factory_make("ffmux_mp4")
+    else:
+      self._mux =  gst.element_factory_make("oggmux")
+    self._pipeline.add(self._mux)
+    
+    location = "file://" + os.path.abspath(location)
+    
+    self._filesink = gst.element_factory_make("filesink")
+    self._filesink.set_property("location", outfile)
+    self._pipeline.add(self._filesink)
+    
+
+    self._mux.link(self._filesink)
+        
+    media_start = offset
+    media_duration = duration
+    start = 0 * gst.SECOND
+    
+    name = os.path.basename(location)
+
+    #video source
+    '''
+    vsrc = gst.element_factory_make("gnlfilesource", "%s_v" % name)
+    vsrc.props.location = location
+    vsrc.props.caps = self.vcaps
+    vsrc.props.media_start = media_start
+    vsrc.props.media_duration = media_duration + gst.SECOND / 50
+    vsrc.props.start = start
+    vsrc.props.duration = media_duration
+    '''
+    vsrc = file_gnl_src("%s_v" % name, 
+                        location, self.vcaps, start, 
+                        media_duration + gst.SECOND / 25, media_start, 0)
+    self.vtimeline.add(vsrc)
+
+    #audio source
+    '''
+    asrc = gst.element_factory_make("gnlfilesource", "%s_a" % name)
+    asrc.props.location = location
+    asrc.props.caps = self.acaps
+    asrc.props.media_start = media_start
+    asrc.props.media_duration = media_duration
+    asrc.props.start = start
+    asrc.props.duration = media_duration
+    '''
+    asrc = file_gnl_src("%s_a" % name, 
+                        location, self.acaps, start, 
+                        media_duration, media_start, 0)
+
+    self.atimeline.add(asrc)
+    
+    self.endpos = media_start + media_duration
+  
+    self.vencoder = VideoEncoder(self.outputformat, self.width, self.height)
+    self._pipeline.add(self.vencoder.bin)
+    self.vencoder.bin.link(self._mux)
+    
+    self.aencoder = AudioEncoder(self.outputformat, self.samplerate, self.channels)
+    self._pipeline.add(self.aencoder.bin)
+
+    self.aencoder.bin.link(self._mux)
+    
+    
+  def extract(self):
+    self._pipeline.set_state(gst.STATE_PAUSED)
+    self._pipeline.get_state()
+    self._pipeline.set_state(gst.STATE_PLAYING)
+    self._pipeline.get_state()
+    self.mainloop.run()
+
+  def onBusMessage(self, bus, message):
+    if message.src == self._pipeline and message.type == gst.MESSAGE_EOS:
+      self.quit()
+    return True
+  
+  def quit(self):
+    self._pipeline.set_state(gst.STATE_NULL)
+    self._pipeline.get_state()
+    self.mainloop.quit()
+  
+  def _on_new_vpad(self, element, pad):
+    pad.link(self.vencoder.bin.get_pad("sink"))
+  
+  def _on_new_apad(self, element, pad):
+    pad.link(self.aencoder.bin.get_pad("sink"))
+    
+if __name__ == "__main__":
+  import sys
+  width = 128
+  height = 80
+  samplerate = 44100
+  channels = 1
+  inputFile = sys.argv[1]
+  outputFile = sys.argv[2]
+  offset = int(float(sys.argv[3]) * gst.MSECOND)
+  duration = int(float(sys.argv[4]) * gst.MSECOND)
+  
+  frameName = "%s.jpg" % os.path.splitext(outputFile)[0]
+  f = FirstFrame(inputFile, frameName, offset, width)
+  height = f.height
+  clip = ExtractClip(inputFile, outputFile, offset, duration, width, height, samplerate, channels)
+  clip.extract()
--- a/oxdbarchive/tools/extract_timeline.py
+++ b/oxdbarchive/tools/extract_timeline.py
@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=2:sts=2:ts=2
+
+import gobject
+gobject.threads_init()
+
+import pygst
+pygst.require('0.10')
+import gst
+import Image
+import sys, os, shutil, time
+from glob import glob
+import math
+
+
+bar_part_time = 60 * 10
+bar_height = 16
+
+class GstTimeline:
+  def __init__(self, timeline_png):
+    self.timeline_png = timeline_png
+    self.bar = None
+    self.frames = 0
+    self.length = 0
+    self.number_of_frames = 0
+    
+  def addVideo(self, video_file):
+    s = ''' filesrc name=input 
+        ! decodebin name=dbin 
+        ! queue name =q 
+        ! ffmpegcolorspace ! video/x-raw-rgb 
+        ! fakesink name=output signal-handoffs=true
+      '''
+    self.pipeline = gst.parse_launch(s)
+    self.input  = self.pipeline.get_by_name('input')
+    self.fakesink = self.pipeline.get_by_name('output')
+    self.dbin = self.pipeline.get_by_name('dbin')
+    self.bus = self.pipeline.get_bus()
+    self.input.set_property("location", video_file)
+
+    self.pipeline.set_state(gst.STATE_PAUSED)
+    self.pipeline.get_state()
+    
+    #length
+    queue = self.pipeline.get_by_name('q')
+    pads = queue.sink_pads()
+    q = gst.query_new_duration(gst.FORMAT_TIME)
+    for pad in pads:
+      if pad.get_peer() and pad.get_peer().query(q):
+        format, self.length = q.parse_duration()
+    
+    seconds = int(math.ceil(float(self.length) / gst.SECOND))
+    print "seconds", seconds
+    self.timelineImage = Image.new("RGB", (seconds, bar_height))
+    self.timelineImagePos = 0
+    
+    #extract frames
+    ho = self.fakesink.connect ("handoff", self.snapshot_png_handoff_cb)
+    self.pipeline.set_state(gst.STATE_PLAYING)
+    self.pipeline.get_state()
+    while 1:
+      msg = self.bus.poll(gst.MESSAGE_EOS | gst.MESSAGE_ERROR, gst.SECOND)
+      if msg:
+        break
+    self.fakesink.disconnect(ho)
+    self.pipeline.set_state(gst.STATE_NULL)
+  
+  def cropAndSave(self):
+    if self.frames:
+      mini_width = int(self.frames / self.fps)
+      mini_width = min(mini_width, bar_part_time)
+      #crop if segment is shorter
+      c = self.bar.crop((0, 0, self.frames,bar_height))
+      #resize to 1 pixel per second
+      mini = c.resize((mini_width,bar_height), Image.ANTIALIAS)
+      #add to timeline png
+      self.timelineImage.paste(mini, (self.timelineImagePos, 0))
+      self.timelineImagePos += mini_width
+    self.frames = 0
+  
+  def close(self):
+    self.cropAndSave()
+    self.timelineImage.save(self.timeline_png)
+    
+  def snapshot_png_handoff_cb(self, sink, gst_buffer, pad):
+    caps = sink.sink_pads().next().get_negotiated_caps()
+    for s in caps:
+      input_d = (s['width'], s['height'])
+      framerate = s['framerate']
+      if not self.bar:
+        self.fps = float(framerate.num) / framerate.denom
+        self.width_part = int(self.fps * bar_part_time)
+        self.bar = Image.new("RGB", (self.width_part + 1 ,bar_height))
+      
+    img = Image.fromstring('RGB',input_d, gst_buffer)
+    height = 16
+    img = img.resize((1, height), Image.ANTIALIAS)
+    self.frames += 1
+    for i in range(height):
+      self.bar.putpixel((self.frames, i), img.getpixel((0,i)))
+    if self.frames >= self.width_part and self.frames > 30:
+      self.cropAndSave()
+
+def usage():
+  print ""
+  print "usage: %s output_base_path video_file" % sys.argv[0]
+  print ""
+  sys.exit(1)
+  
+if __name__ == "__main__":
+  if len(sys.argv) < 2:
+    usage()
+  if not os.path.exists(sys.argv[1]):
+    print "target does not exist"
+    sys.exit(1)
+  
+  g = GstTimeline(sys.argv[1])
+  g.addVideo(sys.argv[2])
+  g.close()
--- a/oxdbarchive/tools/shift_srt.py
+++ b/oxdbarchive/tools/shift_srt.py
@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=2:sts=2:ts=2
+import sys
+import re
+import os
+
+
+def srt2dict(srt, encoding = "latin-1"):
+  '''convert srt string into a dict in the form
+     dict(num = dict(start, stop, text))
+  '''
+  subdict = {}
+  srt = srt.replace('\r', '').strip()
+  subtitles = srt.strip().split('\n\n')
+  for subtitle in subtitles:
+    if subtitle.strip():
+      subtitle = subtitle.strip().split('\n')
+      if len(subtitle) > 2:
+        start_stop = subtitle[1].split(' --> ')
+        subtitle[0] ="%s" % int(subtitle[0])
+        subdict[subtitle[0]] = {
+          'start': start_stop[0], 
+          'stop': start_stop[1],
+          'text': unicode('\n'.join(subtitle[2:]), encoding),
+          }
+  return subdict
+
+def dict2srt(subtitles, encoding = "latin-1"):
+  '''convert dict in the form dict(num = dict(start, stop, text))
+     into an srt file
+  '''
+  srt = ''
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    k = "%s" % k
+    srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
+      k, 
+      subtitles[k]['start'], 
+      subtitles[k]['stop'], 
+      subtitles[k]['text'])
+  srt = srt.strip()
+  return srt.encode(encoding)
+  
+def time_str2msec(time_string):
+  from datetime import datetime, timedelta
+  import time
+  msec = float("0." + time_string[-3:])
+  time_string = time_string[:-4]
+  time_string = "2007 " + time_string
+  offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
+  base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
+  return int((offset - base) * 1000)
+
+def msec2time_str(msec):
+  import time
+  msec_string = "%s" % msec
+  ms = ",%s" % msec_string[-3:]
+  sec = float(msec) / 1000
+  return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
+
+def shift_time(offset, time_string):
+  ''' return time shifted by offset milliseconds
+      format of time is expedted to be 01:50:52,123
+  '''
+  new_time = time_str2msec(time_string) + offset
+  return msec2time_str(new_time)
+
+
+def shift_subtitles(offset, offset_num, subtitles):
+  '''
+    shifts a subtitle by offset
+  '''
+  sdict = {}
+  for k in sorted([int(k) for k in subtitles.keys()]):
+    ko = "%s" % (k + offset_num)
+    sdict[ko] = subtitles["%s" % k]
+    sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
+    sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
+  return sdict
+
+if __name__ == '__main__':
+  srt = open(sys.argv[1]).read()
+  srtd = srt2dict(srt)
+  offset = int(sys.argv[2])
+  srtd = shift_subtitles(offset, 0, srtd)
+  srt = dict2srt(srtd)
+  print srt
+  
--- a/oxdbarchive/tools/singledecodebin.py
+++ b/oxdbarchive/tools/singledecodebin.py
@ -0,0 +1,305 @@
+# -*- coding: utf-8 -*-
+# -*- Mode: Python; -*-
+# vi:si:et:sw=4:sts=4:ts=4
+#
+#       pitivi/elements/singledecodebin.py
+#
+# Copyright (c) 2005, Edward Hervey <bilboed@bilboed.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+"""
+Single-stream queue-less decodebin
+"""
+
+import gobject
+import gst
+
+def is_raw(caps):
+    """ returns True if the caps are RAW """
+    rep = caps.to_string()
+    valid = ["video/x-raw", "audio/x-raw", "text/plain", "text/x-pango-markup"]
+    for val in valid:
+        if rep.startswith(val):
+            return True
+    return False
+
+class SingleDecodeBin(gst.Bin):
+
+    __gsttemplates__ = (
+        gst.PadTemplate ("sinkpadtemplate",
+                         gst.PAD_SINK,
+                         gst.PAD_ALWAYS,
+                         gst.caps_new_any()),
+        gst.PadTemplate ("srcpadtemplate",
+                         gst.PAD_SRC,
+                         gst.PAD_SOMETIMES,
+                         gst.caps_new_any())
+        )
+    def __init__(self, caps=None, uri=None, *args, **kwargs):
+        gst.Bin.__init__(self, *args, **kwargs)
+        if not caps:
+            caps = gst.caps_new_any()
+        self.caps = caps
+        self.typefind = gst.element_factory_make("typefind", "internal-typefind")
+        self.add(self.typefind)
+
+        self.uri = uri
+        if self.uri and gst.uri_is_valid(self.uri):
+            self.urisrc = gst.element_make_from_uri(gst.URI_SRC, uri, "urisrc")
+            self.log("created urisrc %s / %r" % (self.urisrc.get_name(),
+                                                 self.urisrc))
+            self.add(self.urisrc)
+            self.urisrc.link(self.typefind)
+        else:
+            self._sinkpad = gst.GhostPad("sink", self.typefind.get_pad("sink"))
+            self._sinkpad.set_active(True)
+            self.add_pad(self._sinkpad)
+
+        self.typefind.connect("have_type", self._typefindHaveTypeCb)
+
+        self._srcpad = None
+
+        self._dynamics = []
+
+        self._validelements = [] #added elements
+
+        self._factories = self._getSortedFactoryList()
+
+
+    ## internal methods
+
+    def _controlDynamicElement(self, element):
+        self.log("element:%s" % element.get_name())
+        self._dynamics.append(element)
+        element.connect("pad-added", self._dynamicPadAddedCb)
+        element.connect("no-more-pads", self._dynamicNoMorePadsCb)
+
+    def _getSortedFactoryList(self):
+        """
+        Returns the list of demuxers, decoders and parsers available, sorted
+        by rank
+        """
+        def myfilter(fact):
+            if fact.get_rank() < 64 :
+                return False
+            klass = fact.get_klass()
+            if not ("Demuxer" in klass or "Decoder" in klass or "Parse" in klass):
+                return False
+            return True
+        reg = gst.registry_get_default()
+        res = [x for x in reg.get_feature_list(gst.ElementFactory) if myfilter(x)]
+        res.sort(lambda a, b: int(b.get_rank() - a.get_rank()))
+        return res
+
+    def _findCompatibleFactory(self, caps):
+        """
+        Returns a list of factories (sorted by rank) which can take caps as
+        input. Returns empty list if none are compatible
+        """
+        self.debug("caps:%s" % caps.to_string())
+        res = []
+        for factory in self._factories:
+            for template in factory.get_static_pad_templates():
+                if template.direction == gst.PAD_SINK:
+                    intersect = caps.intersect(template.static_caps.get())
+                    if not intersect.is_empty():
+                        res.append(factory)
+                        break
+        self.debug("returning %r" % res)
+        return res
+
+    def _closeLink(self, element):
+        """
+        Inspects element and tries to connect something on the srcpads.
+        If there are dynamic pads, it sets up a signal handler to
+        continue autoplugging when they become available.
+        """
+        to_connect = []
+        dynamic = False
+        templates = element.get_pad_template_list()
+        for template in templates:
+            if not template.direction == gst.PAD_SRC:
+                continue
+            if template.presence == gst.PAD_ALWAYS:
+                pad = element.get_pad(template.name_template)
+                to_connect.append(pad)
+            elif template.presence == gst.PAD_SOMETIMES:
+                pad = element.get_pad(template.name_template)
+                if pad:
+                    to_connect.append(pad)
+                else:
+                    dynamic = True
+            else:
+                self.log("Template %s is a request pad, ignoring" % pad.name_template)
+
+        if dynamic:
+            self.debug("%s is a dynamic element" % element.get_name())
+            self._controlDynamicElement(element)
+
+        for pad in to_connect:
+            self._closePadLink(element, pad, pad.get_caps())
+
+    def _tryToLink1(self, source, pad, factories):
+        """
+        Tries to link one of the factories' element to the given pad.
+
+        Returns the element that was successfully linked to the pad.
+        """
+        self.debug("source:%s, pad:%s , factories:%r" % (source.get_name(),
+                                                         pad.get_name(),
+                                                         factories))
+        result = None
+        for factory in factories:
+            element = factory.create()
+            if not element:
+                self.warning("weren't able to create element from %r" % factory)
+                continue
+
+            sinkpad = element.get_pad("sink")
+            if not sinkpad:
+                continue
+
+            self.add(element)
+            try:
+                pad.link(sinkpad)
+            except:
+                element.set_state(gst.STATE_NULL)
+                self.remove(element)
+                continue
+
+            self._closeLink(element)
+            element.set_state(gst.STATE_PAUSED)
+
+            result = element
+            break
+
+        return result
+
+    def _closePadLink(self, element, pad, caps):
+        """
+        Finds the list of elements that could connect to the pad.
+        If the pad has the desired caps, it will create a ghostpad.
+        If no compatible elements could be found, the search will stop.
+        """
+        self.debug("element:%s, pad:%s, caps:%s" % (element.get_name(),
+                                                    pad.get_name(),
+                                                    caps.to_string()))
+        if caps.is_empty():
+            self.log("unknown type")
+            return
+        if caps.is_any():
+            self.log("type is not know yet, waiting")
+            return
+        if caps.intersect(self.caps):
+            # This is the desired caps
+            if not self._srcpad:
+                self._wrapUp(element, pad)
+        elif is_raw(caps):
+            self.log("We hit a raw caps which isn't the wanted one")
+            # FIXME : recursively remove everything until demux/typefind
+
+        else:
+            # Find something
+            if len(caps) > 1:
+                self.log("many possible types, delaying")
+                return
+            facts = self._findCompatibleFactory(caps)
+            if not facts:
+                self.log("unknown type")
+                return
+            self._tryToLink1(element, pad, facts)
+
+    def _wrapUp(self, element, pad):
+        """
+        Ghost the given pad of element.
+        Remove non-used elements.
+        """
+
+        if self._srcpad:
+            return
+        self._markValidElements(element)
+        self._removeUnusedElements(self.typefind)
+        self.log("ghosting pad %s" % pad.get_name)
+        self._srcpad = gst.GhostPad("src", pad)
+        self._srcpad.set_active(True)
+        self.add_pad(self._srcpad)
+        self.post_message(gst.message_new_state_dirty(self))
+
+    def _markValidElements(self, element):
+        """
+        Mark this element and upstreams as valid
+        """
+        self.log("element:%s" % element.get_name())
+        if element == self.typefind:
+            return
+        self._validelements.append(element)
+        # find upstream element
+        pad = list(element.sink_pads())[0]
+        parent = pad.get_peer().get_parent()
+        self._markValidElements(parent)
+
+    def _removeUnusedElements(self, element):
+        """
+        Remove unused elements connected to srcpad(s) of element
+        """
+        self.log("element:%s" % element)
+        for pad in element.src_pads():
+            if pad.is_linked():
+                peer = pad.get_peer().get_parent()
+                self._removeUnusedElements(peer)
+                if not peer in self._validelements:
+                    self.log("removing %s" % peer.get_name())
+                    pad.unlink(pad.get_peer())
+                    peer.set_state(gst.STATE_NULL)
+                    self.remove(peer)
+
+    def _cleanUp(self):
+        self.log("")
+        if self._srcpad:
+            self.remove_pad(self._srcpad)
+        self._srcpad = None
+        for element in self._validelements:
+            element.set_state(gst.STATE_NULL)
+            self.remove(element)
+        self._validelements = []
+
+    ## Overrides
+
+    def do_change_state(self, transition):
+        self.debug("transition:%r" % transition)
+        res = gst.Bin.do_change_state(self, transition)
+        if transition in [gst.STATE_CHANGE_PAUSED_TO_READY, gst.STATE_CHANGE_READY_TO_NULL]:
+            self._cleanUp()
+        return res
+
+    ## Signal callbacks
+
+    def _typefindHaveTypeCb(self, typefind, probability, caps):
+        self.debug("probability:%d, caps:%s" % (probability, caps.to_string()))
+        self._closePadLink(typefind, typefind.get_pad("src"), caps)
+
+    ## Dynamic element Callbacks
+
+    def _dynamicPadAddedCb(self, element, pad):
+        self.log("element:%s, pad:%s" % (element.get_name(), pad.get_name()))
+        if not self._srcpad:
+            self._closePadLink(element, pad, pad.get_caps())
+
+    def _dynamicNoMorePadsCb(self, element):
+        self.log("element:%s" % element.get_name())
+
+gobject.type_register(SingleDecodeBin)
--- a/oxdbarchive/tools/subtitles.py
+++ b/oxdbarchive/tools/subtitles.py
@ -0,0 +1,139 @@
+#!/usr/bin/env python
+# depends on 
+# subtitleripper - http://subtitleripper.sourceforge.net
+# unrar
+# tesseract-ocr - http://tesseract-ocr.googlecode.com
+#
+
+import Image
+import os
+import sys
+import shutil
+from glob import glob
+import warnings
+
+warnings.filterwarnings("ignore", "tempnam")
+
+colors = ('0,255,255,255', '255,0,255,255', '255,255,0,255', '255,255,255,0')
+
+def readFile(fname):
+  f = open(fname)
+  data = f.read()
+  f.close()
+  return data
+
+def getColorChangeCount(image_name):
+  i = Image.open(image_name)
+  background = i.getpixel((0,0))
+  c = 0
+  max_line_count = 0
+  if background == 255:
+    for x in range(0, i.size[0]):
+      black = 0
+      line_count = 0
+      for y in range(0, i.size[1]):
+        p = i.getpixel((x,y))
+        if p == background:
+          if black:
+            line_count+=1
+          black = 0
+        else:
+          black +=1
+      max_line_count = max(line_count, max_line_count)
+  return max_line_count
+  
+def getBestMask(filename):
+  outputs = {}
+  for c in colors:
+    output = os.path.join(workdir, c.replace(',', '-'))
+    input_file = os.path.join(output,filename)
+    change_count = getColorChangeCount(input_file)
+    if change_count:
+      outputs[change_count] = dict(
+                                output=output, 
+                                input_file=input_file, 
+                              )
+  return outputs[min(outputs.keys())]
+
+#main
+input_base = sys.argv[1]
+if input_base.endswith('.'):
+  input_base = input_base[:-1]
+input_base = os.path.abspath(input_base)
+
+workdir = os.tempnam()
+os.mkdir(workdir)
+os.chdir(workdir)
+
+input_files = glob("%s*" % input_base)
+sub_file = "%s.sub" % input_base
+rar_file = "%s.rar" % input_base
+idx_file = "%s.idx" % input_base
+srt_file = "%s.srt" % input_base
+
+working_base = input_base
+if sub_file not in input_files and rar_file in input_files:
+  working_base = os.path.join(workdir, os.path.basename(input_base))
+  shutil.copy(rar_file, "%s.rar" % working_base)
+  rar_file = "%s.rar" % working_base
+  sub_file = "%s.sub" % working_base
+  shutil.copy(idx_file, "%s.idx" % working_base)
+  idx_file = "%s.idx" % working_base
+  
+  cmd="unrar x '%s' > /dev/null 2>&1" % rar_file
+  os.system(cmd)
+  subs = glob("*.sub")
+  if subs:
+    os.rename(subs[0], sub_file)
+  else:
+    print "no sub file found"
+    #cleanup
+    shutil.rmtree(workdir)
+    sys.exit(1)
+
+sub_lang = "en"
+language = ''
+for l in readFile(idx_file).split('\n'):
+  if l.startswith('id: %s' % sub_lang):
+    language = "-t %s" % l.split('index: ')[-1].strip()
+
+for c in colors:
+  output = os.path.join(workdir, c.replace(',', '-'))
+  if not os.path.exists(output):
+    os.makedirs(output)
+  cmd = "vobsub2pgm %s -c %s '%s' %s/english >/dev/null 2>&1" % (language, c, working_base, output)
+  os.system(cmd)
+
+best_output = getBestMask("english0010.pgm")
+pgms = glob("%s/english*.pgm" % best_output['output'])
+for pgm in sorted(pgms):
+  #input_pgm = getBestMask(os.path.basename(pgm))['input_file']
+  input_pgm = pgm
+  subtitle_tif = os.path.join(workdir, 'subtitle.tif')
+  cmd = "convert %s %s;tesseract %s %s >/dev/null 2>&1" %(input_pgm, subtitle_tif, subtitle_tif, pgm)
+  os.system(cmd)
+
+#FIXME what about adding ispell here, interactive again
+cmd = "ispell %s/english*.txt"  % best_output['output']
+
+
+cmd = "srttool -s -w < %s/english.srtx > '%s'" % (best_output['output'], srt_file)
+os.system(cmd)
+
+#correct some common mistaces of tesseract
+sed_script = os.path.join(workdir, 'fix_sed_script')
+f = open(sed_script, 'w')
+f.write('''s/Idn'/ldn'/g
+s/Id'v/ld'v/g
+s/ldn'\!/ldn't/g
+s/\\\/\\l/W/g
+s/V\\\l/W/g
+s/eII/ell/g
+s/></x/g
+''')
+
+cmd = """sed -f %s -i '%s'""" % (sed_script, srt_file)
+os.system(cmd)
+
+#cleanup
+shutil.rmtree(workdir)
--- a/sample-prod.cfg
+++ b/sample-prod.cfg
@ -0,0 +1,84 @@
+[global]
+# This is where all of your settings go for your production environment.
+# You'll copy this file over to your production server and provide it
+# as a command-line option to your start script.
+# Settings that are the same for both development and production
+# (such as template engine, encodings, etc.) all go in 
+# oxdbarchive/config/app.cfg
+
+# DATABASE
+
+# pick the form for your database
+# sqlobject.dburi="postgres://username@hostname/databasename"
+# sqlobject.dburi="mysql://username:password@hostname:port/databasename"
+# sqlobject.dburi="sqlite:///file_name_and_path"
+
+# If you have sqlite, here's a simple default to get you started
+# in development
+sqlobject.dburi="sqlite://%(current_dir_uri)s/devdata.sqlite"
+
+
+# if you are using a database or table type without transactions
+# (MySQL default, for example), you should turn off transactions
+# by prepending notrans_ on the uri
+# sqlobject.dburi="notrans_mysql://username:password@hostname:port/databasename"
+
+# for Windows users, sqlite URIs look like:
+# sqlobject.dburi="sqlite:///drive_letter:/path/to/file"
+
+
+# SERVER
+
+server.environment="production"
+
+# Sets the number of threads the server uses
+# server.thread_pool = 1
+
+# if this is part of a larger site, you can set the path
+# to the TurboGears instance here
+# server.webpath=""
+
+# Set to True if you are deploying your App behind a proxy
+# e.g. Apache using mod_proxy
+# base_url_filter.on = False
+
+# Set to True if your proxy adds the x_forwarded_host header
+# base_url_filter.use_x_forwarded_host = True
+
+# If your proxy does not add the x_forwarded_host header, set
+# the following to the *public* host url.
+# (Note: This will be overridden by the use_x_forwarded_host option
+# if it is set to True and the proxy adds the header correctly.
+# base_url_filter.base_url = "http://www.example.com"
+
+# Set to True if you'd like to abort execution if a controller gets an
+# unexpected parameter. False by default
+# tg.strict_parameters = False
+
+# LOGGING
+# Logging configuration generally follows the style of the standard
+# Python logging module configuration. Note that when specifying
+# log format messages, you need to use *() for formatting variables.
+# Deployment independent log configuration is in oxdbarchive/config/log.cfg
+[logging]
+
+[[handlers]]
+
+[[[access_out]]]
+# set the filename as the first argument below
+args="('server.log',)"
+class='FileHandler'
+level='INFO'
+formatter='message_only'
+
+[[loggers]]
+[[[oxdbarchive]]]
+level='ERROR'
+qualname='oxdbarchive'
+handlers=['error_out']
+
+[[[access]]]
+level='INFO'
+qualname='turbogears.access'
+handlers=['access_out']
+propagate=0
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,62 @@
+from setuptools import setup, find_packages
+from turbogears.finddata import find_package_data
+
+import os
+execfile(os.path.join("oxdbarchive", "release.py"))
+
+setup(
+    name="oxdbarchive",
+    version=version,
+    
+    # uncomment the following lines if you fill them out in release.py
+    #description=description,
+    #author=author,
+    #author_email=email,
+    #url=url,
+    #download_url=download_url,
+    #license=license,
+    
+    install_requires = [
+        "TurboGears >= 1.0.2.2",
+    ],
+    scripts = ["start-oxdbarchive.py"],
+    zip_safe=False,
+    packages=find_packages(),
+    package_data = find_package_data(where='oxdbarchive',
+                                     package='oxdbarchive'),
+    keywords = [
+        # Use keywords if you'll be adding your package to the
+        # Python Cheeseshop
+        
+        # if this has widgets, uncomment the next line
+        # 'turbogears.widgets',
+        
+        # if this has a tg-admin command, uncomment the next line
+        # 'turbogears.command',
+        
+        # if this has identity providers, uncomment the next line
+        # 'turbogears.identity.provider',
+    
+        # If this is a template plugin, uncomment the next line
+        # 'python.templating.engines',
+        
+        # If this is a full application, uncomment the next line
+        # 'turbogears.app',
+    ],
+    classifiers = [
+        'Development Status :: 3 - Alpha',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+        'Framework :: TurboGears',
+        # if this is an application that you'll distribute through
+        # the Cheeseshop, uncomment the next line
+        # 'Framework :: TurboGears :: Applications',
+        
+        # if this is a package that includes widgets that you'll distribute
+        # through the Cheeseshop, uncomment the next line
+        # 'Framework :: TurboGears :: Widgets',
+    ],
+    test_suite = 'nose.collector',
+    )
+    
--- a/start-oxdbarchive.py
+++ b/start-oxdbarchive.py
@ -0,0 +1,25 @@
+#!/usr/bin/env python
+import pkg_resources
+pkg_resources.require("TurboGears")
+
+from turbogears import update_config, start_server
+import cherrypy
+cherrypy.lowercase_api = True
+from os.path import *
+import sys
+
+# first look on the command line for a desired config file,
+# if it's not on the command line, then
+# look for setup.py in this directory. If it's not there, this script is
+# probably installed
+if len(sys.argv) > 1:
+    update_config(configfile=sys.argv[1], 
+        modulename="oxdbarchive.config")
+elif exists(join(dirname(__file__), "setup.py")):
+    update_config(configfile="dev.cfg",modulename="oxdbarchive.config")
+else:
+    update_config(configfile="prod.cfg",modulename="oxdbarchive.config")
+
+from oxdbarchive.controllers import Root
+
+start_server(Root())
--- a/test.cfg
+++ b/test.cfg
@ -0,0 +1,5 @@
+# You can place test-specific configuration options here (like test db uri, etc)
+#
+
+sqlobject.dburi = "sqlite:///:memory:"
+