oxdb archive, backend 219

2007-07-10 12:31:08 +00:00 · 2007-07-10 12:31:08 +00:00 · e46666b6d9
commit e46666b6d9
39 changed files with 3265 additions and 0 deletions
--- a/README.txt
+++ b/README.txt
@ -0,0 +1,4 @@
 oxdbarchive
 This is a TurboGears (http://www.turbogears.org) project. It can be
 started by running the start-oxdbarchive.py script.
--- a/dev.cfg
+++ b/dev.cfg
@ -0,0 +1,47 @@
 [global]
 # This is where all of your settings go for your development environment
 # Settings that are the same for both development and production
 # (such as template engine, encodings, etc.) all go in 
 # oxdbarchive/config/app.cfg
 # DATABASE
 sqlobject.dburi="notrans_mysql://root@localhost/oxdbarchive?sqlobject_encoding=utf-8&use_unicode=1&charset=utf8"
 # SERVER
 server.socket_port=8081
 # Enable the debug output at the end on pages.
 # log_debug_info_filter.on = False
 server.environment="development"
 autoreload.package="oxdbarchive"
 # Auto-Reload after code modification
 # autoreload.on = True
 # Set to True if you'd like to abort execution if a controller gets an
 # unexpected parameter. False by default
 tg.strict_parameters = True
 # LOGGING
 # Logging configuration generally follows the style of the standard
 # Python logging module configuration. Note that when specifying
 # log format messages, you need to use *() for formatting variables.
 # Deployment independent log configuration is in oxdbarchive/config/log.cfg
 [logging]
 [[loggers]]
 [[[oxdbarchive]]]
 level='DEBUG'
 qualname='oxdbarchive'
 handlers=['debug_out']
 [[[allinfo]]]
 level='INFO'
 handlers=['debug_out']
 [[[access]]]
 level='INFO'
 qualname='turbogears.access'
 handlers=['access_out']
 propagate=0
--- a/oxdbarchive.egg-info/PKG-INFO
+++ b/oxdbarchive.egg-info/PKG-INFO
@ -0,0 +1,15 @@
 Metadata-Version: 1.0
 Name: oxdbarchive
 Version: 1.0
 Summary: UNKNOWN
 Home-page: UNKNOWN
 Author: UNKNOWN
 Author-email: UNKNOWN
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
 Classifier: Development Status :: 3 - Alpha
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Framework :: TurboGears
--- a/oxdbarchive.egg-info/SOURCES.txt
+++ b/oxdbarchive.egg-info/SOURCES.txt
@ -0,0 +1,21 @@
 README.txt
 setup.py
 start-oxdbarchive.py
 oxdbarchive/__init__.py
 oxdbarchive/controllers.py
 oxdbarchive/json.py
 oxdbarchive/model.py
 oxdbarchive/release.py
 oxdbarchive.egg-info/PKG-INFO
 oxdbarchive.egg-info/SOURCES.txt
 oxdbarchive.egg-info/dependency_links.txt
 oxdbarchive.egg-info/not-zip-safe
 oxdbarchive.egg-info/paster_plugins.txt
 oxdbarchive.egg-info/requires.txt
 oxdbarchive.egg-info/sqlobject.txt
 oxdbarchive.egg-info/top_level.txt
 oxdbarchive/config/__init__.py
 oxdbarchive/templates/__init__.py
 oxdbarchive/tests/__init__.py
 oxdbarchive/tests/test_controllers.py
 oxdbarchive/tests/test_model.py
--- a/oxdbarchive.egg-info/dependency_links.txt
+++ b/oxdbarchive.egg-info/dependency_links.txt
@ -0,0 +1 @@
--- a/oxdbarchive.egg-info/not-zip-safe
+++ b/oxdbarchive.egg-info/not-zip-safe
@ -0,0 +1 @@
--- a/oxdbarchive.egg-info/paster_plugins.txt
+++ b/oxdbarchive.egg-info/paster_plugins.txt
@ -0,0 +1,2 @@
 TurboGears
 PasteScript
--- a/oxdbarchive.egg-info/requires.txt
+++ b/oxdbarchive.egg-info/requires.txt
@ -0,0 +1 @@
 TurboGears >= 1.0.2.2
--- a/oxdbarchive.egg-info/sqlobject.txt
+++ b/oxdbarchive.egg-info/sqlobject.txt
@ -0,0 +1,2 @@
 db_module=oxdbarchive.model
 history_dir=$base/oxdbarchive/sqlobject-history
--- a/oxdbarchive.egg-info/top_level.txt
+++ b/oxdbarchive.egg-info/top_level.txt
@ -0,0 +1 @@
 oxdbarchive
--- a/oxdbarchive/init.py
+++ b/oxdbarchive/init.py
--- a/oxdbarchive/cache.py
+++ b/oxdbarchive/cache.py
@ -0,0 +1,81 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import os
 from os.path import abspath, exists, join, dirname, basename
 import shutil
 from glob import glob
 import Image
 from StringIO import StringIO
 from scrapeit.utils import read_url
 cache_root = join(dirname(abspath(__file__)), 'cache')
 img_extension = "jpg"
 frame_cache_root = join(cache_root, 'frame')
 mini_movie_folder = '/mnt/storage/oil/oxdb/mini'
 def loadFile(f_name):
  f = open(f_name)
  data = f.read()
  f.close()
  return data
 def saveFile(f_name, data):
  f = open(f_name, 'w')
  f.write(data)
  f.close()
 def loadStaticFile(fname):
  return loadFile(join(dirname(abspath(__file__)), "static", fname))
 def framePath(frameType, movieID, position):
  position = position.replace(':', '.')
  frame_root = join(cache_root, frameType)
  frame = join(frame_root, imgName(join(movieID, position)))
  if not exists(dirname(frame)):
    os.makedirs(dirname(frame))
  return frame
 def loadDefaultFrame(afile):
  frame = framePath('frame', afile, 'default').replace('jpg', 'png')
  if not exists(frame):
    data = loadStaticFile('images/stillDark.png')
    imageIO = StringIO(data)
    sourceImage = Image.open(imageIO)
    sourceWidth = sourceImage.size[0]
    sourceHeight = sourceImage.size[1]
    top = (sourceHeight - afile.sceneHeight) / 2
    targetImage = sourceImage.crop((0, top, sourceWidth, top + afile.sceneHeight))
    targetImage.save(frame, 'PNG')
  return loadFile(frame)
 def loadFrame(afile, position):
  position = basename(position)
  frame = framePath('frame', afile.oxdb, position)
  if not exists(frame):
    afile.extractFrame(position)
  if exists(frame):
    return loadFile(frame)
  return loadDefaultFrame(afile)
 def loadClip(afile, position):
  position = basename(position)
  flash = framePath('frame', afile.oxdb, position).replace(img_extension, 'flv')
  if not exists(flash):
    afile.extractClip(position)
  if exists(flash):
    return loadFile(flash)
  return ''
 def loadTimeline(afile):
  timeline = afile.timelineFile
  if not exists(timeline):
    afile.extractTimeline()
  if exists(timeline):
    return loadFile(timeline)
  return ''
--- a/oxdbarchive/config/init.py
+++ b/oxdbarchive/config/init.py
--- a/oxdbarchive/config/app.cfg
+++ b/oxdbarchive/config/app.cfg
@ -0,0 +1,51 @@
 [global]
 # The settings in this file should not vary depending on the deployment
 # environment. dev.cfg and prod.cfg are the locations for
 # the different deployment settings. Settings in this file will
 # be overridden by settings in those other files.
 # The commented out values below are the defaults
 # VIEW
 # which view (template engine) to use if one is not specified in the
 # template name
 # tg.defaultview = "kid"
 # The following kid settings determine the settings used by the kid serializer.
 # One of (html|html-strict|xhtml|xhtml-strict|xml|json)
 # kid.outputformat="html"
 # kid.encoding="utf-8"
 # The sitetemplate is used for overall styling of a site that
 # includes multiple TurboGears applications
 # tg.sitetemplate="<packagename.templates.templatename>"
 # Allow every exposed function to be called as json,
 # tg.allow_json = False
 # List of Widgets to include on every page.
 # for exemple ['turbogears.mochikit']
 # tg.include_widgets = []
 # Set to True if the scheduler should be started
 # tg.scheduler = False
 # Set session or cookie
 # session_filter.on = True
 # compress the data sends to the web browser
 # [/] 
 # gzip_filter.on = True
 # gzip_filter.mime_types = ["application/x-javascript", "text/javascript", "text/html", "text/css", "text/plain"]
 [/static]
 static_filter.on = True
 static_filter.dir = "%(top_level_dir)s/static"
 [/favicon.ico]
 static_filter.on = True
 static_filter.file = "%(top_level_dir)s/static/images/favicon.ico"
--- a/oxdbarchive/config/log.cfg
+++ b/oxdbarchive/config/log.cfg
@ -0,0 +1,29 @@
 # LOGGING
 # Logging is often deployment specific, but some handlers and
 # formatters can be defined here.
 [logging]
 [[formatters]]
 [[[message_only]]]
 format='*(message)s'
 [[[full_content]]]
 format='*(asctime)s *(name)s *(levelname)s *(message)s'
 [[handlers]]
 [[[debug_out]]]
 class='StreamHandler'
 level='DEBUG'
 args='(sys.stdout,)'
 formatter='full_content'
 [[[access_out]]]
 class='StreamHandler'
 level='INFO'
 args='(sys.stdout,)'
 formatter='message_only'
 [[[error_out]]]
 class='StreamHandler'
 level='ERROR'
 args='(sys.stdout,)'
--- a/oxdbarchive/controllers.py
+++ b/oxdbarchive/controllers.py
@ -0,0 +1,39 @@
 from turbogears import controllers, expose, flash, redirect
 from model import *
 import cherrypy
 # import logging
 # log = logging.getLogger("oxdbarchive.controllers")
 '''
 /md5/frame/position.jpg
 /md5/clip/position.jpg
 /md5/metadata
 '''
 def httpExpires(sec):
  exp_date = cherrypy.lib.httptools.HTTPDate(time.gmtime(time.mktime(time.gmtime()) + sec))
  return exp_date
 class Root(controllers.RootController):
  @expose()
  def default(self, md5Hash, action, position = None):
    f = ArchiveFile.byMd5sum(md5Hash)
    if action == 'metadata':
      return dict(meta = f)
    elif position or action == 'timeline': #clip / frame / timeline
      cherrypy.response.headerMap['Content-Type'] = "image/jpeg"
      cherrypy.response.headerMap["Expires"] = httpExpires(60*60*24*15)
      position = position.replace('.png', '').replace('.jpg', '')
      position = position.replace('-', ':').replace('.',':')
      if action == 'timeline':
        return f.timeline()
      if action == 'clip':
        return f.clip(position)
      elif action == 'frame':
        return f.frame(position)
    redirect('http://0xdb.oil21.org/')
  @expose()
  def index(self):
    redirect('http://0xdb.oil21.org/')
--- a/oxdbarchive/cron.py
+++ b/oxdbarchive/cron.py
@ -0,0 +1,13 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 from model import *
 def findeNew():
  archive = Archive.get(1)
  archive.importFiles()
 def extractNew():
  for f in ArchiveFile.select(ArchiveFile.q.extracted == False):
    f.extractAll()
--- a/oxdbarchive/json.py
+++ b/oxdbarchive/json.py
@ -0,0 +1,26 @@
 # A JSON-based API(view) for your app.
 # Most rules would look like:
 #
 # @jsonify.when("isinstance(obj, YourClass)")
 # def jsonify_yourclass(obj):
 #     return [obj.val1, obj.val2]
 #
 # @jsonify can convert your objects to following types:
 # lists, dicts, numbers and strings
 from turbojson.jsonify import jsonify
@jsonify.when('isinstance(obj, ArchiveFile)')
 def jsonify_ArchiveFile(obj):
    result = jsonify_sqlobject( obj )
    #FIXME, possibly do something with the date values
    # date, date_added, modDate, pubDate
    for key in ('id', 'archiveID', 'subtitle_meta_id'):
      result.pop(key)
    return result
@jsonify.when('isinstance(obj, datetime.datetime)')
 def jsonify_datetime(obj):
  return str(obj)
--- a/oxdbarchive/midentify.py
+++ b/oxdbarchive/midentify.py
@ -0,0 +1,96 @@
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import os
 import sys
 _audio_codec_map = {
  'ffmp3': 'mp3',
 }
 _video_codec_map = {
  'xvid': 'XviD',
  'dx50': 'DivX',
  'dvsd': 'DV',
  'divx': 'DivX',  
 }
 def oggzinfo(fname):
  cmd = 'oggzinfo -b -l "%s"' % fname
  f = os.popen(cmd.encode('utf-8'))
  data = f.read().strip()
  f.close()
  raw_dict = {}
  for row in data.split('\n'):
    try:
      t = row.split(':')
      key = t[0]
      value = ":".join(t[1:])
      raw_dict[key.strip()] = value.strip()
    except:
      pass
  oxdb_dict = {}
  duration = raw_dict.get('Content-Duration',-1)
  d = duration.split(':')
  duration = 0
  while d:
    duration = float(d.pop(0)) + duration * 60
  oxdb_dict['length'] = int(duration * 1000)
  oxdb_dict['height'] = int(raw_dict.get('Video-Height',0))
  oxdb_dict['width'] = int(raw_dict.get('Video-Width',0))
  oxdb_dict['fps'] = float(raw_dict.get('Video-Framerate', '0').replace('fps',''))
  if float(oxdb_dict['height']):
    oxdb_dict['aspect'] = float(oxdb_dict['width']) / float(oxdb_dict['height'])
  else:
    oxdb_dict['aspect'] = -1.0
  oxdb_dict['video_bitrate'] = int(float(raw_dict.get('Content-Bitrate-Average','0').replace('kbps','')) * 1024)
  oxdb_dict['video_codec'] = 'Theora'
  oxdb_dict['audio_bitrate'] = int(float(raw_dict.get('Content-Bitrate-Average','0').replace('kbps','')) * 1024)
  oxdb_dict['audio_codec'] = 'Vorbis'
  oxdb_dict['audio_rate'] = int(raw_dict.get('Audio-Samplerate', '0').replace('Hz', ''))
  oxdb_dict['audio_channels'] = int(raw_dict.get('Audio-Channels',1))
  return oxdb_dict
 def identify(fname):
  if fname.endswith('sub') or fname.endswith('srt') or fname.endswith('idx'):
    return dict(
      length=0, height=0, width=0, fps=0, 
      video_bitrate=0, audio_bitrate=0, audio_rate=0, audio_channels=0,
      audio_codec='', video_codec='', aspect=-1
    )
  if fname.endswith('ogg') or fname.endswith('ogv'):
    return oggzinfo(fname)
  cmd = 'midentify "%s"' % fname
  f = os.popen(cmd.encode('utf-8'))
  data = f.read().strip()
  f.close()
  raw_dict = {}
  for row in data.split('\n'):
    try:
      key, value = row.split('=')
      raw_dict[key] = value.strip()
    except:
      pass
  oxdb_dict = {}
  oxdb_dict['length'] = int(float(raw_dict.get('ID_LENGTH',-1)) * 1000)
  oxdb_dict['height'] = int(raw_dict.get('ID_VIDEO_HEIGHT',0))
  oxdb_dict['width'] = int(raw_dict.get('ID_VIDEO_WIDTH',0))
  oxdb_dict['fps'] = float(raw_dict.get('ID_VIDEO_FPS',0))
  oxdb_dict['aspect'] = float(raw_dict.get('ID_VIDEO_ASPECT',0))
  if not oxdb_dict['aspect'] and float(oxdb_dict['height']):
    oxdb_dict['aspect'] = float(oxdb_dict['width']) / float(oxdb_dict['height'])
  else:
    oxdb_dict['aspect'] = -1.0
  oxdb_dict['video_bitrate'] = int(raw_dict.get('ID_VIDEO_BITRATE',0))
  oxdb_dict['video_codec'] = raw_dict.get('ID_VIDEO_FORMAT','unknown').lower()
  oxdb_dict['audio_bitrate'] = int(raw_dict.get('ID_AUDIO_BITRATE',0))
  oxdb_dict['audio_codec'] = raw_dict.get('ID_AUDIO_CODEC','unknown').lower()
  oxdb_dict['audio_rate'] = int(raw_dict.get('ID_AUDIO_RATE',0))
  oxdb_dict['audio_channels'] = int(raw_dict.get('ID_AUDIO_NCH',1))
  oxdb_dict['audio_codec'] = _audio_codec_map.get(oxdb_dict['audio_codec'], oxdb_dict['audio_codec'])
  oxdb_dict['video_codec'] = _video_codec_map.get(oxdb_dict['video_codec'], oxdb_dict['video_codec'])
  if oxdb_dict['length'] < 0: oxdb_dict['length'] = 0
  return oxdb_dict
--- a/oxdbarchive/model.py
+++ b/oxdbarchive/model.py
@ -0,0 +1,499 @@
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 from sqlobject import *
 from turbogears.database import PackageHub
 import turbogears
 import re
 from urllib import quote, quote_plus
 import os
 from os.path import abspath, join, dirname
 from datetime import datetime
 import time
 import math
 from glob import glob
 import shutil
 import oxdb_cache
 import cache
 import oxdb_import
 from oxdb_utils import oxdb_title, oxdb_director, oxdb_id
 from subtitles import *
 import midentify
 hub = PackageHub('oxdbarchive')
 __connection__ = hub
 class Archive(SQLObject):
  name = UnicodeCol(length=255, alternateID=True)
  basePath = UnicodeCol()
  def _get_basePath(self):
    basePath = self._SO_get_basePath()
    if not basePath.endswith('/'):
      basePath = basePath + "/"
      self.basePath = basePath
    return basePath
  def _get_files(self):
    q = ArchiveFile.select(ArchiveFile.q.archiveID == self.id)
    return [f for f in q]
  def _get_file_list(self):
    files = {}
    for f in self.files:
      try:
        d = dict(md5sum = f.md5sum, size = f.size)
        files[f.path] = d
      except SQLObjectNotFound:
        f.destroySelf()
    return files
  def addLocalFile(self, fname, movie = None): 
    params = oxdb_import.oxdb_file_stats(fname)
    params = oxdb_import.oxdb_file_metadata(params)
    params['date'] = datetime.fromtimestamp(params['date'])
    return self.addFile(params, movie)
  def addFile(self, params, movie = None):
    '''
      updates or adds new file to database, 
      params is a dict with at least md5sum, path, date but also needs
      audio, video, length, size, bpp for new files
    '''
    params['path'] = params['path'].replace(self.basePath, '')
    q = ArchiveFile.select(AND(
        ArchiveFile.q.archiveID == self.id,
        ArchiveFile.q.md5sum == params['md5sum'],
      ))
    if q.count() > 0:
      '''update existing entry'''
      f = q[0]
      #FIXME: deal with double files here. right now they are changed
      if f.path != params['path']:
        ret =  "this file is already in the database, first time at:\n\t%s\n\t" % f.path
      else:
        ret = "updated entry"
      for field in ('path', 'date'):
        setattr(f, field, params[field])
    else:
      #just a new md5? happens for srt files quite often
      qq = ArchiveFile.select(AND(
        ArchiveFile.q.archiveID == self.id,
        ArchiveFile.q.path == params['path'],
      ))
      f = None
      if qq.count() == 1:
        f = qq[0]
        ret = "updated entry"
      else:
        ''' add new file to database '''
        title = oxdb_title(params['path'])
        director = oxdb_director(params['path'])
        oxdb = oxdb_id(title, director)
        f = ArchiveFile(
          archive = self,
          path = params['path'], 
          date = params['date'],
          oxdb = oxdb,
          md5sum = params['md5sum'],
          audio = params['audio'],
          video = params['video'],
          length = params['length'],
          size = params['size'],
          bpp = params['bpp'],
          date_added = datetime.now(),
          subtitle = params['path'].endswith('.srt'),
        )
        ret = "added entry"
    f.updateMeta()
    return ret
  def removeFile(self, md5sum):
    '''
      remove file based on md5sum from archive
    '''
    q = ArchiveFile.select(AND(
        ArchiveFile.q.archiveID == self.id,
        ArchiveFile.q.md5sum == md5sum,
      ))
    if q.count() == 1:
      for i in q: 
        ArchiveFile.delete(i.id)
      return dict(result="file removed")
    return dict(result="not in archive")
  def importFiles(self):
    stats = {'skipped': 0, 'added': 0, 'remove':0}
    print self.basePath
    files = oxdb_import.oxdb_spider(self.basePath)
    oxdb_files = self.file_list()
    md5sum_on_disk = []
    for f in files:
      meta = oxdb_import.oxdb_file_stats(f)
      f = f.replace(base, '')
      if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
        stats['skipped'] += 1
        md5sum_on_disk.append(oxdb_files[f]['md5sum'])
      else:
        meta = oxdb_import.oxdb_file_metadata(meta)
        #remove base
        meta['path'] = f.encode('utf-8')
        #ignore files changed in the last 5 minutes
        print self.addFile(meta), f
        stats['added'] += 1
        md5sum_on_disk.append(meta['md5sum'])
    for f in oxdb_files:
      if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
        print "remove", f
        self.removeFile({'md5sum':oxdb_files[f]['md5sum']})
        stats['remove'] += 1
    print stats
    return stats
 class ArchiveFile(SQLObject):
  '''
  ALTER TABLE file_meta CHANGE size size bigint;
  ALTER TABLE file_meta CHANGE pixels pixels bigint;
  ALTER TABLE file_meta CHANGE srt srt LONGTEXT;
  '''
  md5sum = UnicodeCol(length=128, alternateID=True)
  oxdb = UnicodeCol(length=128)
  path = UnicodeCol()
  date = DateTimeCol()
  archive = ForeignKey('Archive')
  audio = UnicodeCol()
  video = UnicodeCol()
  length = IntCol()
  size = IntCol()
  bpp = IntCol(default = 0)
  pixels = IntCol(default = 0)
  date_added = DateTimeCol(default = datetime.now)
  pubDate = DateTimeCol(default = datetime.now)
  modDate = DateTimeCol(default = datetime.now)
  height = IntCol(default = -1)
  width = IntCol(default = -1)
  frameAspect = UnicodeCol(default = "1.6", length = 100)
  bitrate = IntCol(default = -1)
  fps = IntCol(default = -1)
  srt = UnicodeCol(default = '')
  subtitle_meta_id = IntCol(default = -1)
  subtitle = BoolCol(default = False)
  part = IntCol(default = 1)
  broken = BoolCol(default = False)
  extracted = BoolCol(default = False)
  filename = UnicodeCol(default = '')
  def _get_part(self):
    part = 1
    parts = re.compile('Part (\d)').findall(self.path)
    if not parts:
      parts = re.compile('CD (\d)').findall(self.path)
    if parts:
      part = int(parts[-1])
    self.part = part
    return part
  def _get_offset(self):
    if not self.part:
      self.part = 1
    if self.part == 1:
      return 0
    length = 0
    q = ArchiveFile.select(AND(
      ArchiveFile.q.oxdb == self.oxdb,
      ArchiveFile.q.part < self.part,
      ArchiveFile.q.subtitle == False,
      ))
    for f in q:
      length += f.length
    return length
  def _get_ext(self):
    return self.path.split('.')[-1]
  def _get_preferredVersion(self):
    e = self.nameExtra.lower()
    for pref in ('directors cut', 'long version'):
      if pref in e:
        return True
    return False
  def _get_nameExtra(self):
    path = os.path.basename(self.path)
    parts = path.replace(self.title, '').split('.')[:-1]
    parts = filter(lambda x: not x.startswith('Part'), parts)
    parts = filter(lambda x: not x.startswith('Season'), parts)
    parts = filter(lambda x: not x.startswith('Episode'), parts)
    parts = filter(lambda x: not x.startswith('vo'), parts)
    extra = '.'.join(parts)
    if extra.startswith('.'):
      extra = extra[1:]
    return extra
  def _get_title(self):
    return oxdb_title(self.path)
  def _get_director(self):
    return oxdb_director(self.path)
  def _get_absolutePath(self):
    return join(self.archive.basePath, self.path)
  def updateMeta(self):
    self.findSubtitleLink()
    if os.path.exists(self.absolutePath):
      info = midentify.identify(self.absolutePath)
      self.length = info['length']
      self.width = info['width']
      self.frameAspect = "%0.6f" % info['aspect']
      self.height = info['height']      
      self.bitrate = info['video_bitrate']
      self.fps = info['fps']
      self.audio = info['audio_codec']
      self.video = info['video_codec']
      self.updatePixels()
      self.updateBpp()
      self.loadSubtitleFromFile()
      self.oxdb = oxdb_id(self.title, self.director)
  def _get_frameAspect(self):
    aspect =  float(self._SO_get_frameAspect())
    if self.subtitle:
      return 1
    if aspect == -1:
      if self.height:
        aspect = float(self.width) / self.height
      else:
        aspect = 16.0 / 10
      self.frameAspect = "%0.6f" % aspect
    return aspect
  def _get_sceneHeight(self):
    default = 80
    if not self.subtitle:
      h = int(128 / self.frameAspect)
      h = h + h % 2
      return h
    return default
  def _get_movieFile(self):
    if self.subtitle and self.subtitle_meta_id>0:
      try:
        m = ArchiveFile.get(self.subtitle_meta_id)
      except:
        m = None
        self.subtitle_meta_id = -1
        self.srt = ''
      return m
    return None
  def _get_subtitleFile(self):
    if not self.subtitle and self.subtitle_meta_id>0:
      try:
        s = ArchiveFile.get(self.subtitle_meta_id)
      except:
        s = None
        self.subtitle_meta_id = -1
        self.srt = ''
      return s
    return None
  def findSubtitleLink(self):
    subtitle = not self.subtitle
    q = ArchiveFile.select(AND(
      ArchiveFile.q.oxdb == self.oxdb,
      ArchiveFile.q.part == self.part,
      ArchiveFile.q.subtitle == subtitle,
      ))
    self.subtitle_meta_id = -1
    if q.count():
      for f in q:
        if not f.path.endswith('.sub'):
          if f.nameExtra == self.nameExtra or f.nameExtra == 'en':
            self.subtitle_meta_id = f.id
  def _get_mini_movie_file(self):
    return join(oxdb_cache.mini_movie_folder, self.md5sum[:4], "%s.avi" % self.md5sum)
  def removeMiniMovie(self): 
    if os.path.exists(self.mini_movie_file):
      os.remove(self.mini_movie_file)
  def _findSubtitleByStart(self, start):
    if self.srt:
      d = srt2dict(self.srt)
      for s in d.values():
        if s['start'] == start:
          return s
    return None
  def extractAll(self, force = False):
    self.updateMeta()
    self.extractClipMovie()
    self.extractTimeline()
  def extractClip(self, inpoint, outpoint=-1, flash_folder=oxdb_cache.frame_cache_root): 
    movie_file = self.mini_movie_file
    flash_folder = join(flash_folder, self.oxdb)
    flash_movie = join(flash_folder, "%s.flv" % inpoint.replace(':', '.'))
    if not os.path.exists(flash_folder):
      os.makedirs(flash_folder)
    width = 128
    height = int(width / (self.width / self.height))
    height = height - height % 2
    if outpoint == -1:
      s = self._findSubtitleByStart(inpoint)
      if s:
        outpoint = s['stop']
      else:
        outpoint = shift_time(2000, inpoint)
    if self.part > 1:
      offset = self.offset
    extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
    #extract_flash_ng(self.absolutePath, flash_movie, inpoint, outpoint, width, height, offset)
  def extractFrame(self, position, img_folder=oxdb_cache.frame_cache_root): 
    if self.movieFile:
      return self.movieFile.extractFrame(position, img_folder)
    movie_file = self.mini_movie_file
    img_folder = join(img_folder, self.oxdb)
    if not os.path.exists(img_folder):
      os.makedirs(img_folder)
    extract_frame(movie_file, position, img_folder, offset = 0, redo = False)
  def extractFrames(self, img_folder=oxdb_cache.frame_cache_root): 
    if self.movieFile:
      return self.movieFile.extractFrames(img_folder)
    movie_file = self.absolutePath
    img_folder = join(img_folder, self.oxdb)
    if not os.path.exists(img_folder):
      os.makedirs(img_folder)    
    extract_subtitles(movie_file, self.srt.encode('utf-8'), img_folder, width=100, offset=self.offset)
  def extractClipMovie(self, force = False): 
    if self.broken:
      return
    mini_movie_file = self.mini_movie_file
    movie_file = self.absolutePath
    if not movie_file or not os.path.exists(movie_file):
      return
    if os.path.exists(mini_movie_file):
      print "clip exists, skipping extraction", mini_movie_file
      return
    if not os.path.exists(dirname(mini_movie_file)):
      os.makedirs(dirname(mini_movie_file))
    options = ''
    options += " -ovc lavc -lavcopts vcodec=mjpeg"
    options += " -af volnorm=1 -oac mp3lame -lameopts br=64:mode=3 -af resample=44100"
    options += " -vf scale -zoom -xy 128"
    options += ' "%s"' % movie_file
    options += ' -o "%s"' % mini_movie_file
    cmd = "mencoder %s >/dev/null 2>&1" % options
    print cmd.encode('utf-8')
    os.system(cmd.encode('utf-8'))
  def _get_timelineFile(self):
    return join(oxdb_cache.cache_root, 'timeline', self.md5sum[:4], "%s.png" % self.md5sum)
  def removeTimeline(self):
    if os.path.exists(self.timelineFile):
      os.unlink(self.timelineFile)
  def extractTimeline(self, force = False):
    if self.broken:
      return
    if force:
      self.removeTimeline()
    #return if its not a video
    if self.height <= 0:
      return
    t = self.timelineFile
    if os.path.exists(self.mini_movie_file):
      if not os.path.exists(t):
        os.makedirs(os.path.dirname(t))
    #lets only extract the timeline if it does not exist yet
    if os.path.exists(t):
      print "skipping, ", self.path
      return
    extractTimelineScript = abspath(join(dirname(__file__), "tools/extract_timeline.py"))
    cmd = "python %s %s %s" %(extractTimelineScript, t, self.mini_movie_file)
    os.system(cmd)
  def loadSubtitleFromFile(self): 
    if self.movieFile:
      movieFile = self.movieFile
      subtitle = self
    else:
      movieFile = self
      subtitle = self.subtitleFile
    if movieFile:
      movieFile.srt = ''
    if subtitle and movieFile:
      if not subtitle.absolutePath or not os.path.exists(subtitle.absolutePath):
        return
      if not subtitle.absolutePath.endswith('.srt'):
        print "this is not a subtitle", subtitle.absolutePath
        return
      movieFile.srt  = loadSrt(subtitle.absolutePath)
  def _set_fps(self, fps):
    fps = int(fps * 10000)
    self._SO_set_fps(fps)
  def _get_fps(self):
    fps = self._SO_get_fps()
    if fps:
      return float(fps) / 10000
    return 0.0
  def _get_resolution(self):
    if self.subtitle or (not self.width or not self.height):
      return u''
    return "%sx%s" % (self.width, self.height)
  def updateBpp(self):
    if self.height and self.width and self.fps and self.bitrate:
      bpp = int(self.height * self.width * self.fps / self.bitrate)
      self.bpp = bpp
    else:
      bpp = 0
    return bpp
  def updatePixels(self):
    if self.length and self.fps and self.width and self.height:
      pixels = int((self.length / 1000) * self.fps * self.width * self.height)
      self.pixels = pixels
    else:
      pixels = 0
    return pixels
  def _get_pixels(self):
    pixels = self._SO_get_pixels()
    if not pixels:
      pixels = self.updatePixels()
    return pixels
  def clip(self, position):
    return cache.loadClip(self, position)
  def frame(self, position):
    return cache.loadFrame(self, position)
  def timeline(self):
    return cache.loadTimeline(self)
--- a/oxdbarchive/oxdb_cache.py
+++ b/oxdbarchive/oxdb_cache.py
@ -0,0 +1,273 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import os
 from os.path import abspath, exists, join, dirname, basename
 import shutil
 from glob import glob
 import Image
 from StringIO import StringIO
 from scrapeit.utils import read_url
 cache_root = join(dirname(abspath(__file__)), 'cache')
 img_extension = "jpg"
 frame_cache_root = join(cache_root, 'frame')
 mini_movie_folder = '/mnt/storage/oil/oxdb/mini'
 def loadFile(f_name):
  f = open(f_name)
  data = f.read()
  f.close()
  return data
 def saveFile(f_name, data):
  f = open(f_name, 'w')
  f.write(data)
  f.close()
 def loadStaticFile(fname):
  return loadFile(join(dirname(abspath(__file__)), "static", fname))
 '''
  returns name including a possible directory level for a given hash
 '''
 def imgName(imdb):
  return "%s.%s" % (imdb, img_extension)
 '''
  returns path to an icon from iconType for given icon in the cache
 '''
 def iconPath(iconType, movie):
  icon_root = join(cache_root, iconType)
  icon = join(icon_root, imgName(movie.imdb))
  if not exists(dirname(icon)):
    os.makedirs(dirname(icon))
  return icon
 '''
  render reflection of sourceFile on targetFile, 
  uses alpha, target files needs to support RGBA, i.e. png
 '''
 def _writeReflection(sourceFile, targetFile, height = 0.5, opacity = 0.25):
  sourceImage = Image.open(sourceFile).convert('RGB')
  sourceSource = sourceImage.size[0]
  sourceHeight = sourceImage.size[1]
  targetWidth = sourceImage.size[0]
  targetHeight = int(round(sourceHeight * height))
  targetImage = Image.new('RGBA', (targetWidth, targetHeight))
  for y in range(0, targetHeight):
    brightness = int(255 * (targetHeight - y) * opacity / targetHeight)
    for x in range(0, targetWidth):
      targetColor = sourceImage.getpixel((x, sourceHeight - 1 - y)) 
      targetColor += (brightness, )
      targetImage.putpixel((x, y), targetColor)
  targetImage.save(targetFile, optimized = True)
 def resizePoster(data, max_resolution, format = 'JPEG'):
  posterIO = StringIO(data)
  sourceImage = Image.open(posterIO)
  sourceWidth = sourceImage.size[0]
  sourceHeight = sourceImage.size[1]
  if int(round((float(max_resolution[1]) * sourceWidth) / sourceHeight)) < max_resolution[0]:
    max_resolution[0] = int(round((float(max_resolution[1]) * sourceWidth) / sourceHeight))
  if int(round((float(max_resolution[0]) / sourceWidth) * sourceHeight)) < max_resolution[1]:
    max_resolution[1] = int(round((float(max_resolution[0]) / sourceWidth) * sourceHeight))
  if sourceWidth >= sourceHeight:
    targetWidth = max_resolution[0]
    targetHeight = int(round((float(max_resolution[0]) / sourceWidth) * sourceHeight))
  else:
    targetWidth = int(round((float(max_resolution[1]) * sourceWidth) / sourceHeight))
    targetHeight = max_resolution[1]
  if targetWidth < sourceWidth:
    resizeMethod = Image.ANTIALIAS
  else:
    resizeMethod = Image.BICUBIC
  targetSize = (targetWidth, targetHeight)
  targetImage = sourceImage.resize(targetSize, resizeMethod)
  f = StringIO()
  if format == 'JPEG':
    targetImage.save(f, 'JPEG', quality=90)
  else:
    targetImage.save(f, 'PNG')
  return f.getvalue()
 '''
  download poster from  imdb and resize it before saving into cache
  returns poster data
 '''
 def downloadPoster(movie):
  icon = iconPath('poster', movie)
  if movie.posterFile:
    data = loadFile(movie.posterFile)
  else:
    data = read_url(movie.poster)
  posterIO = StringIO(data)
  sourceImage = Image.open(posterIO)
  sourceWidth = sourceImage.size[0]
  sourceHeight = sourceImage.size[1]
  if sourceWidth >= sourceHeight:
    targetWidth = 128
    targetHeight = int(round((128.0 / sourceWidth) * sourceHeight))
  else:
    targetWidth = int(round((128.0 * sourceWidth) / sourceHeight))
    targetHeight = 128
  if targetWidth < sourceWidth:
    resizeMethod = Image.ANTIALIAS
  else:
    resizeMethod = Image.BICUBIC
  targetSize = (targetWidth, targetHeight)
  targetImage = sourceImage.resize(targetSize, resizeMethod).convert('RGB')
  targetImage.save(icon)
  return loadFile(icon)
 '''
  return icon data, reads from remote url if not cached
 '''
 def loadPoster(movie):
  if not movie.hasPoster():
    return ''
    #return loadStaticFile('images/posterDark.png')
    #return loadStaticFile('images/posterBlack.png')
  icon = iconPath('poster', movie)
  if exists(icon):
    data = loadFile(icon)
  else:
    data = downloadPoster(movie)
  return data
 '''
  return icon reflection data, renders reflection if it does not exists
 '''
 def loadPosterReflection(movie):
  icon = iconPath('poster', movie)
  iconReflection = iconPath('posterReflection', movie).replace('jpg', 'png')
  if not exists(iconReflection):
    if not exists(icon):
      loadPoster(movie)
    if exists(icon):
      _writeReflection(icon, iconReflection)
    else:
      return loadStaticFile('images/posterDark.reflection.png')
  return loadFile(iconReflection)
 '''
  returns path to a frame from type for given movie in the cache
 '''
 def framePath(frameType, movie, position):
  position = position.replace(':', '.')
  frame_root = join(cache_root, frameType)
  frame = join(frame_root, imgName(join(movie.imdb, position)))
  if not exists(dirname(frame)):
    os.makedirs(dirname(frame))
  return frame
 def loadClip(movie, position):
  position = basename(position)
  flash = framePath('frame', movie, position).replace(img_extension, 'flv')
  if not exists(flash):
    movie.extractClip(position)
  if exists(flash):
    return loadFile(flash)
  return ''
 '''
  returns png frame of the given position. 
 '''
 def loadFrame(movie, position):
  position = basename(position)
  frame = framePath('frame', movie, position)
  if not exists(frame):
    #movie.extractClip(position)
    movie.extractFrame(position)
  if exists(frame):
    return loadFile(frame)
  return loadDefaultFrame(movie)
 def loadDefaultFrameReflection(movie):
  frame = framePath('frame', movie, 'default').replace('jpg', 'png')
  frameReflection = framePath('frameReflection', movie, 'default').replace('jpg', 'png')
  if not exists(frameReflection):
    if not exists(frame):
      loadDefaultFrame(movie)
    if exists(frame):
      _writeReflection(frame, frameReflection)
    else:
      return loadStaticFile('images/stillDark.reflection.png')
  return loadFile(frameReflection)
 def loadDefaultFrame(movie):
  frame = framePath('frame', movie, 'default').replace('jpg', 'png')
  if not exists(frame):
    data = loadStaticFile('images/stillDark.png')
    imageIO = StringIO(data)
    sourceImage = Image.open(imageIO)
    sourceWidth = sourceImage.size[0]
    sourceHeight = sourceImage.size[1]
    top = (sourceHeight - movie.sceneHeight) / 2
    targetImage = sourceImage.crop((0, top, sourceWidth, top + movie.sceneHeight))
    targetImage.save(frame, 'PNG')
  return loadFile(frame)
 '''
  returns png frame reflection of the given position. 
 '''
 def loadFrameReflection(movie, position):
  position = basename(position)
  frame = framePath('frame', movie, position)
  frameReflection = framePath('frameReflection', movie, position).replace('jpg', 'png')
  if not exists(frameReflection):
    if not exists(frame):
      loadFrame(movie, position)
    if exists(frame):
      _writeReflection(frame, frameReflection)
    else:
      return loadDefaultFrameReflection(movie)
  return loadFile(frameReflection)
 def loadTimeline(movie, position):
  bar = framePath('timeline', movie, position).replace('jpg', 'png')
  if exists(bar):
    return loadFile(bar)
  print bar
  return ''
  #FIXME load and return bar hre
 '''
 move cache files to new imdb
 '''
 def moveCache(old_imdb, new_imdb):
  old = join(cache_root, 'frame', old_imdb)
  new = join(cache_root, 'frame', new_imdb)
  if exists(old) and not exists(new):
    shutil.move(old, new)
  if exists(old):
    shutil.rmtree(old)
  old = join(cache_root, 'frameReflection', old_imdb)
  new = join(cache_root, 'frameReflection', new_imdb)
  if exists(old) and not exists(new):
    shutil.move(old, new)
  if exists(old):
    shutil.rmtree(old)
  old = join(cache_root, 'timeline', old_imdb)
  new = join(cache_root, 'timeline', new_imdb)
  if exists(old) and not exists(new):
    shutil.move(old, new)
  if exists(old):
    shutil.rmtree(old)
--- a/oxdbarchive/oxdb_import.py
+++ b/oxdbarchive/oxdb_import.py
@ -0,0 +1,174 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 # OXDb Import client, crawls the filesystem and gathers information about 
 # movies
 #
 import md5
 import os
 import sys
 import re
 import urllib
 import time
 import simplejson
 from scrapeit.utils import read_url
 class OXDb:
  def __init__(self, archive):
    self.archive = archive
  def getBase(self):
    self.basePath = self.ar
    self.basePath = self._remote_call('base')['base']
    if not self.basePath.endswith('/'):
      self.basePath = self.basePath + '/'
    return self.basePath
  def files(self):
    return self._remote_call('list')['files']
  def addFile(self, params):
    return self._remote_call('add', params)['result']
  def removeFile(self, params):
    return self._remote_call('remove', params)['result']
 def oxdb_md5sum(fname):
    '''Returns an md5 hash for file'''
    msum = None
    if os.path.exists(fname):
      f = file(fname, 'rb')
      m = md5.new()
      while True:
          d = f.read(8096)
          if not d:
              break
          m.update(d)
      md5sum = m.hexdigest()
      f.close()
    return md5sum
 _oxdb_extensions = (
  '.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb',
  '.mp3', '.wav',
  '.srt', '.sub', '.idx', '.rar',
  '.jpg', '.png',
 )
 def _oxdb_file_blacklist(f):
  if f.startswith('.'):
    return True
  return False
 def oxdb_spider(archive_base):
  oxdb_files = []
  for root, dirs, files in os.walk(archive_base):
    for d in dirs:
      oxdb_files.extend(oxdb_spider(os.path.join(root, d)))
    for f in files:
      if not _oxdb_file_blacklist(f):
        if os.path.splitext(f)[1] in _oxdb_extensions:
          oxdb_files.append(os.path.join(root, f))      
  return oxdb_files
 _known_oxdb_extensions = ['Interview']
 _known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
 def _in_known_oxdb_extensions(term):
  '''
    used to remove parts that are known to not be part of the title
  '''
  if term in _known_oxdb_extensions:
    return True
  for reg in _known_oxdb_extensions_reg:
    if re.compile(reg, re.IGNORECASE).findall(term):
      return True
  return False
 def oxdb_title(title):
  '''
    normalize filename to get movie title
  '''
  title = title.split('.')[:-1]
  while len(title) > 1 and ( \
          _in_known_oxdb_extensions(title[-1])  or \
          title[-1].startswith('Part ') or \
          len(title[-1]) == 2 or \
          len(title[-1]) == 4):
      title = title[:-1]
  title = ".".join(title)
  return title
 def oxdb_import_files(archive):
  stats = {'skipped': 0, 'added': 0, 'remove':0}
  oxdb_backend = OXDb()
  base = oxdb_backend.getBase()
  print base
  files = oxdb_spider(base)
  oxdb_files = oxdb_backend.files()
  md5sum_on_disk = []
  for f in files:
    meta = oxdb_file_stats(f)
    f = f.replace(base, '')
    if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
      stats['skipped'] += 1
      md5sum_on_disk.append(oxdb_files[f]['md5sum'])
    else:
      meta = oxdb_file_metadata(meta)
      #remove base
      meta['path'] = f.encode('utf-8')
      #ignore files changed in the last 5 minutes
      if time.time() - meta['date'] > 300:
        print oxdb_backend.addFile(meta), f
        stats['added'] += 1
      else:
        print "to hot, skipping for now", f
      md5sum_on_disk.append(meta['md5sum'])
  for f in oxdb_files:
    if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
      print "remove", f
      oxdb_backend.removeFile({'md5sum':oxdb_files[f]['md5sum']})
      stats['remove'] += 1
  print stats
  return stats
 def oxdb_file_stats(fname):
  stat = os.stat(fname)
  size = stat.st_size
  date = stat.st_mtime
  return {
    'path': fname,
    'size': size,
    'date': date,
  }
 def oxdb_file_metadata(meta):
  '''
    check if file is in db, add otherwise
  '''
  meta['video'] = ''
  meta['audio'] = ''
  meta['length'] = 0
  meta['bpp'] = 0
  meta['md5sum'] = oxdb_md5sum(meta['path'])
  #FIXME: use midentifiy or other to get more info about file
  return meta
  title = oxdb_title(os.path.basename(meta['path']))
  director = os.path.basename(os.path.dirname(meta['path']))
  print '"%s"' % title, ' by', director
  #imdb = oxdb_backend.byMD5Sum(md5sum)
 # if invoked on command line, print md5 hashes of specified files.
 if __name__ == '__main__':
  #for fname in sys.argv[1:]:
  #  print oxdb_md5sum(fname), fname
  oxdb_import_files()
--- a/oxdbarchive/oxdb_utils.py
+++ b/oxdbarchive/oxdb_utils.py
@ -0,0 +1,210 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 # OXDb Import client, crawls the filesystem and gathers information about 
 # movies
 #
 import md5
 import os
 import sys
 import re
 import urllib
 from scrapeit.utils import read_url
 _oxdb_file_blacklist = ['.DS_Store']
 _oxdb_extensions = [
  '.avi', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4',
  '.srt', '.sub', '.idx'
 ]
 _known_oxdb_extensions = ['Interview']
 _known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
 def _in_known_oxdb_extensions(term):
  '''
    used to remove parts that are known to not be part of the title
  '''
  if term in _known_oxdb_extensions:
    return True
  for reg in _known_oxdb_extensions_reg:
    if re.compile(reg, re.IGNORECASE).findall(term):
      return True
  return False
 def oxdb_filenameUmlaute(string):
  string = u"%s" % string
  string = string.replace(u'ae', u'ä')
  string = string.replace(u'oe', u'ö')
  string = string.replace(u'ue', u'ü')
  string = string.replace(u'Ae', u'Ä')
  string = string.replace(u'Oe', u'Ö')
  string = string.replace(u'Ue', u'Ü')
  return string
 def oxdb_director(director):
  director = os.path.basename(os.path.dirname(director))
  director.replace(' & ', ', ')
  return director
 def oxdb_title(title):
  '''
    normalize filename to get movie title
  '''
  title = os.path.basename(title).replace('. ', '_dot__space_')
  title = title.replace(' .', '_space__dot_')
  title = title.split('.')[0]
  title = title.replace('_dot__space_', '. ')
  title = title.replace('_space__dot_', ' .')
  return title
 def oxdb_id(title, director):
  key = u"%s/%s" % (director,title)
  oxdb_id = '0x%s' % md5.new(key.encode('utf-8')).hexdigest()
  return oxdb_id
 '''
 seperate number with thousand comma
 '''
 def formatNumber(n, sep=','):
  ln = list(str(n))
  ln.reverse()
  newn = []
  while len(ln) > 3:
    newn.extend(ln[:3])
    newn.append(sep)
    ln = ln[3:]
    newn.extend(ln)
    newn.reverse()
  return "".join(newn)
 '''
 format runtime for stats
 '''
 def oxdb_runtimeformat(runtime):
  if runtime == 0:
    return ''
  if runtime < 60:
    return "%s sec" % runtime
  minutes = int(runtime / 60)
  seconds = runtime % 60
  if minutes < 900:
    return "%s min" % minutes
  hours = int(minutes / 60)
  minutes = minutes % 60
  if hours < 24:
    return "%s hours %s minutes %s seconds" % (hours, minutes, seconds)
  days = int(hours / 24)
  hours = hours % 24
  if days < 365:
    return "%s days %s hours %s minutes %s seconds" % (days, hours, minutes, seconds)
  years = int(days / 365)
  days = days % 365
  return "%s years %s days %s hours %s minutes %s seconds" % (years, days, hours, minutes, seconds)
 def oxdb_lengthformat(mseconds):
  """
    Format mseconds in a nice way
  """
  seconds = mseconds/1000
  minutes = int(seconds / 60)
  seconds = seconds % 60
  hours = int(minutes / 60)
  minutes = minutes % 60
  if hours > 24:
    days = int(hours / 24)
    hours = hours % 24
    return "%d:%02d:%02d:%02d" % (days, hours, minutes, seconds)
  return "%02d:%02d:%02d" % (hours, minutes, seconds)
 """
 Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102
 bytes, etc).
  number - number to format.
  long_name - long name. i.e. byte
  short - short name, i.e. B
 """
 def oxdb_format(number, long_name, short):
  if not number:
    return "0 %ss" % long_name
  number = float(number)
  if number < 1024:
    return "%d %s%s" % (number, long_name, number != 1 and 's' or '')
  if number < 1024 * 1024:
    return "%d K%s" % ((number / 1024), short)
  if number < 1024 * 1024 * 1024:
    return "%.1f M%s" % (number / (1024 * 1024), short)
  if number < 1024 * 1024 * 1024 * 1024:
    return "%.2f G%s" % (number / (1024 * 1024 * 1024), short)
  return "%.3f T%s" % (number / (1024 * 1024 * 1024 * 1024), short)
 def oxdb_filesizeformat(number):
  return oxdb_format(number, 'byte', 'B')
 def oxdb_bitformat(number):
  return oxdb_format(number, 'bit', 'b')
 def oxdb_pixelformat(number):
  return oxdb_format(number, 'pixel', 'px')
 from htmlentitydefs import name2codepoint
 # This pattern matches a character entity reference (a decimal numeric
 # references, a hexadecimal numeric reference, or a named reference).
 charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
 def htmldecode(text):
  """Decode HTML entities in the given text."""
  if type(text) is unicode:
    uchr = unichr
  else:
    uchr = lambda value: value > 255 and unichr(value) or chr(value)
  def entitydecode(match, uchr=uchr):
    entity = match.group(1)
    if entity.startswith('#x'):
      return uchr(int(entity[2:], 16))
    elif entity.startswith('#'):
      return uchr(int(entity[1:]))
    elif entity in name2codepoint:
      return uchr(name2codepoint[entity])
    else:
      return match.group(0)
  return charrefpat.sub(entitydecode, text)
 def highlight(text, query):
  if query:
    m = re.compile("(%s)" % re.escape(query), re.IGNORECASE).findall(text)
    for i in m:
      text = re.sub("(%s)" % re.escape(i), '<span class="textHighlight">\\1</span>', text)
  return text
 def imdb2oxdb(imdb):
  if imdb.startswith('0x') or imdb.startswith('ox') :
    return imdb
  return "0x%06X" % int(imdb)
 def oxdb2imdb(oxdb):
  if len(oxdb) == 8:
    return "%07d" % float(oxdb)
  return oxdb
 def trimString(string, length):
  if len(string) > length:
    string = string[:length - 13] + '...' + string[-10:]
  return string
 languages = ('be', 'bg', 'ba', 'wo', 'bn', 'bo', 'bh', 'bi', 'ji', 'br', 'ja', 
 'ru', 'rw', 'tl', 'rm', 'rn', 'ro', 'gu', 'jw', 'gd', 'ga', 'sv', 'gn', 'gl',
 'om', 'tn', 'fa', 'oc', 'ss', 'or', 'hy', 'hr', 'sw', 'hu', 'hi', 'su', 'ha', 
 'ps', 'pt', 'sk', 'pa', 'pl', 'el', 'eo', 'en', 'zh', 'sm', 'eu', 'et', 'sa', 
 'es', 'mg', 'uz', 'ml', 'mo', 'mn', 'mi', 'as', 'mk', 'ur', 'mt', 'ms', 'mr',
 'my', 'fr', 'fy', 'ia', 'zu', 'fi', 'fj', 'fo', 'nl', 'no', 'na', 'ne', 'xh',
 'co', 'ca', 'cy', 'cs', 'ka', 'kk', 'sr', 'sq', 'ko', 'kn', 'km', 'kl', 'ks', 
 'si', 'sh', 'so', 'sn', 'ku', 'sl', 'ky', 'sg', 'sd', 'yo', 'de', 'da', 'dz',
 'la', 'ln', 'lo', 'tt', 'tr', 'ts', 'lv', 'to', 'lt', 'tk', 'th', 'ti', 'tg',
 'te', 'ta', 'aa', 'ab', 'uk', 'af', 'vi', 'is', 'am', 'it', 'iw', 'vo', 'ik', 
 'ar', 'in', 'ay', 'az', 'ie', 'qu', 'st', 'tw')
--- a/oxdbarchive/release.py
+++ b/oxdbarchive/release.py
@ -0,0 +1,14 @@
 # Release information about oxdbarchive
 version = "1.0"
 # description = "Your plan to rule the world"
 # long_description = "More description about your plan"
 # author = "Your Name Here"
 # email = "YourEmail@YourDomain"
 # copyright = "Vintage 2006 - a good year indeed"
 # if it's open source, you might want to specify these
 # url = "http://yourcool.site/"
 # download_url = "http://yourcool.site/download"
 # license = "MIT"
--- a/oxdbarchive/subtitles.py
+++ b/oxdbarchive/subtitles.py
@ -0,0 +1,278 @@
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import re
 import os
 from os.path import abspath, join, dirname
 import shutil
 import chardet
 img_extension = 'jpg'
 def srt2txt(srt, encoding = "latin-1"):
  subtitles = srt2dict(srt, encoding)
  txt = ''
  for k in sorted([int(k) for k in subtitles.keys()]):
    txt += "%s\n\n" % subtitles["%s" % k]['text']
  return txt.strip()
 def srt2dict(srt, encoding = "latin-1"):
  '''convert srt string into a dict in the form
     dict(num = dict(start, stop, text))
  '''
  subdict = {}
  srt = srt.replace('\r', '').strip()
  subtitles = srt.strip().split('\n\n')
  for subtitle in subtitles:
    if subtitle.strip():
      subtitle = subtitle.strip().split('\n')
      if len(subtitle) > 2:
        start_stop = subtitle[1].split(' --> ')
        subtitle[0] =u"%s" % int(subtitle[0])
        subdict[subtitle[0]] = {
          'start': start_stop[0], 
          'stop': start_stop[1],
          'text': u'\n'.join(subtitle[2:]),
          }
  return subdict
 def dict2srt(subtitles, encoding = "latin-1"):
  '''convert dict in the form dict(num = dict(start, stop, text))
     into an srt file
  '''
  srt = ''
  for k in sorted([int(k) for k in subtitles.keys()]):
    k = "%s" % k
    srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
      k, 
      subtitles[k]['start'], 
      subtitles[k]['stop'], 
      subtitles[k]['text'])
  srt = srt.strip()
  return srt.encode(encoding)
 def time_str2msec(time_string):
  from datetime import datetime, timedelta
  import time
  if len(time_string.split(',')) > 1:
    msec = float("0." + time_string.split(',')[-1])
  else:
    msec = 0.0
  time_string = time_string.split(',')[0]
  time_string = "2007 " + time_string
  offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
  base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
  return int((offset - base) * 1000)
 def msec2time_str(msec):
  import time
  msec_string = "%s" % msec
  ms = ",%s" % msec_string[-3:]
  sec = float(msec) / 1000
  return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
 def shift_time(offset, time_string):
  ''' return time shifted by offset milliseconds
      format of time is expedted to be 01:50:52,123
  '''
  new_time = time_str2msec(time_string) + offset
  return msec2time_str(new_time)
 def shift_subtitles(offset, offset_num, subtitles):
  '''
    shifts a subtitle by offset, where offsest is a tuple (time, position)
  '''
  sdict = {}
  for k in sorted([int(k) for k in subtitles.keys()]):
    ko = "%s" % (k + offset_num)
    sdict[ko] = subtitles["%s" % k]
    sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
    sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
  return sdict
 def merge_subtitles(subtitles):
  '''
    converts a list of subtitles / dict(txt, length)
    into one srt subtitle
  '''
  subs = {}
  offset = 0
  for k in sorted(subtitles.keys()):
    sdict = srt2dict(subtitles[k]['txt'])
    if offset:
      sdict = shift_subtitles(offset, len(subs), sdict)
    for key in sdict:
      subs[key] = sdict[key]
    offset += subtitles[k]['length']
  return dict2srt(subs)
 def split_subtitle(subtitles, offset):
  '''
    split subtitles at offset
  '''
  offset_time = time.strftime("%H:%M:%S", offset)
  one = {}
  two = {}
  for k in sorted([int(k) for k in subtitles.keys()]):
    if subtitles['stop'] < offset_time:
      one[k] = subtitle[k]
    else:
      two[k] = subtitle[k]
  two = shift_subtitles(-offset, -len(two), two)
 def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
  ext = movie_file.split('.')[-1]
  if ext in ('sub', 'srt'):
    print "this is not a movie file, will not try to extract frames"
    return
  if offset:
    print "Inpoint ", inpoint,
    inpoint = shift_time(-offset, inpoint)
    outpoint = shift_time(-offset, outpoint)
    print " becomes ", inpoint
  print "extracting %s -> %s" % (inpoint, outpoint)
  duration = time_str2msec(outpoint) - time_str2msec(inpoint)
  inpoint = time_str2msec(inpoint)
  extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
  cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
  os.system(cmd.encode('utf-8'))
 def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
  import warnings
  warnings.filterwarnings("ignore", "tempnam")
  ext = movie_file.split('.')[-1]
  if ext in ('sub', 'srt', 'mkv'):
    print "this is not a movie file, will not try to extract frames"
    return
  framedir = os.tempnam()
  os.mkdir(framedir)
  os.chdir(framedir)
  if offset:
    print "Inpoint ", inpoint,
    inpoint = shift_time(-offset, inpoint)
    outpoint = shift_time(-offset, outpoint)
    print " becomes ", inpoint
  print "extracting %s -> %s" % (inpoint, outpoint)
  outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
  audiorate = "44100"
  if os.path.exists(movie_file):
    mencoder_options = ''
    mencoder_options += " '%s'" % movie_file
    mencoder_options += " -ss '%s' -endpos %0.2f" % (inpoint, outpoint)
    mencoder_options += ' -ovc copy -oac copy -o tempfile.avi '
    mencoder = "mencoder %s >/dev/null 2>&1" % mencoder_options
    #print mencoder.encode('utf-8')
    os.system(mencoder.encode('utf-8'))
    ffmpeg_options = ''
    #ffmpeg_options += " -ss '%s' -t %0.2f" % (inpoint, outpoint)
    ffmpeg_options += " -y -i 'tempfile.avi'"
    ffmpeg_options += " -ar %s -b 128000 '%s'" % (audiorate, flash_file)
    ffmpeg = "ffmpeg %s >/dev/null 2>&1" % ffmpeg_options
    #print ffmpeg.encode('utf-8')
    os.system(ffmpeg.encode('utf-8'))
  else:
    print "update the cache %s missing" % movie_file.encode('utf-8')
  shutil.rmtree(framedir)
 def extract_frame(movie_file, timestamp, img_folder, width=128, offset = 0, redo = False):
  import warnings
  warnings.filterwarnings("ignore", "tempnam")
  ext = movie_file.split('.')[-1]
  if ext in ('sub', 'srt'):
    print "this is not a movie file, will not try to extract frames"
    return
  framedir = os.tempnam()
  os.mkdir(framedir)
  os.chdir(framedir)
  if offset:
    timestamp_in_file = shift_time(-offset, timestamp)
  else:
    timestamp_in_file = timestamp
  if os.path.exists(movie_file):
    mplayer_options = ''
    mplayer_options += " '%s'" % movie_file
    mplayer_options += " -ss '%s' -frames 2" % (timestamp_in_file)
    mplayer_options += " -vo jpeg:quality=90 -vf scale -zoom -xy %d " % width
    mplayer_options += " -ao null"
    mplayer = "mplayer %s >/dev/null 2>&1" % mplayer_options
    frame = os.path.join(img_folder, "%s.%s" % (timestamp.replace(':', '.'), img_extension))
    if redo or not os.path.exists(frame):
      print mplayer.encode('utf-8')
      os.system (mplayer.encode('utf-8'))
      files = os.listdir(framedir)
      if files:
        print "creating frame ", frame
        shutil.move(os.path.join(framedir,files[-1]), frame)
        if len(files)>1:
          for f in files[:-2]:
            print "unlink", f
            os.unlink(f)
  else:
    print "update the cache %s missing" % movie_file
  shutil.rmtree(framedir)
 def extract_subtitles(movie_file, srt, img_folder, width=128, offset = 0, redo = False):
  subtitles = srt2dict(srt)  
  for k in sorted([int(k) for k in subtitles.keys()]):
    timestamp = subtitles["%s" % k]['start']
    extract_frame(movie_file, timestamp, img_folder, width, offset, redo)
 def detectEncoding(fp):
    bomDict={ # bytepattern : name              
             (0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",        
             (0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
             (0xFE, 0xFF, None, None) : "utf_16_be", 
             (0xFF, 0xFE, None, None) : "utf_16_le", 
             (0xEF, 0xBB, 0xBF, None) : "utf_8",
            }
    # go to beginning of file and get the first 4 bytes
    oldFP = fp.tell()
    fp.seek(0)
    (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
    # try bom detection using 4 bytes, 3 bytes, or 2 bytes
    bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
    if not bomDetection :
        bomDetection = bomDict.get((byte1, byte2, byte3, None))
        if not bomDetection :
            bomDetection = bomDict.get((byte1, byte2, None, None))
    ## if BOM detected, we're done :-)
    fp.seek(oldFP)
    if bomDetection :
        return bomDetection
    encoding = 'latin-1'
    #more character detecting magick using http://chardet.feedparser.org/
    fp.seek(0)
    rawdata = fp.read()
    encoding = chardet.detect(rawdata)['encoding']
    fp.seek(oldFP)
    return encoding
 def loadSrt(fname):
  f = open(fname)
  encoding = detectEncoding(f)
  data = f.read()
  f.close()
  try:
    udata = unicode(data, encoding)
  except:
    try:
      udata = unicode(data, 'latin-1')
    except:
      print "failed to detect encoding, giving up"
      udate = u''
  if udata.startswith(u'\ufeff'): 
    udata = udata[1:]
  return udata
--- a/oxdbarchive/templates/init.py
+++ b/oxdbarchive/templates/init.py
--- a/oxdbarchive/tests/init.py
+++ b/oxdbarchive/tests/init.py
--- a/oxdbarchive/tests/test_controllers.py
+++ b/oxdbarchive/tests/test_controllers.py
@ -0,0 +1,32 @@
 import unittest
 import turbogears
 from turbogears import testutil
 from oxdbarchive.controllers import Root
 import cherrypy
 cherrypy.root = Root()
 class TestPages(unittest.TestCase):
    def setUp(self):
        turbogears.startup.startTurboGears()
    def tearDown(self):
        """Tests for apps using identity need to stop CP/TG after each test to
        stop the VisitManager thread. 
        See http://trac.turbogears.org/turbogears/ticket/1217 for details.
        """
        turbogears.startup.stopTurboGears()
    def test_method(self):
        "the index method should return a string called now"
        import types
        result = testutil.call(cherrypy.root.index)
        assert type(result["now"]) == types.StringType
    def test_indextitle(self):
        "The indexpage should have the right title"
        testutil.createRequest("/")
        response = cherrypy.response.body[0].lower() 
        assert "<title>welcome to turbogears</title>" in response
--- a/oxdbarchive/tests/test_model.py
+++ b/oxdbarchive/tests/test_model.py
@ -0,0 +1,23 @@
 # If your project uses a database, you can set up database tests
 # similar to what you see below. Be sure to set the db_uri to
 # an appropriate uri for your testing database. sqlite is a good
 # choice for testing, because you can use an in-memory database
 # which is very fast.
 from turbogears import testutil, database
 # from oxdbarchive.model import YourDataClass, User
 # database.set_db_uri("sqlite:///:memory:")
 # class TestUser(testutil.DBTest):
 #     def get_model(self):
 #         return User
 #
 #     def test_creation(self):
 #         "Object creation should set the name"
 #         obj = User(user_name = "creosote",
 #                       email_address = "spam@python.not",
 #                       display_name = "Mr Creosote",
 #                       password = "Wafer-thin Mint")
 #         assert obj.display_name == "Mr Creosote"
--- a/oxdbarchive/timeline.py
+++ b/oxdbarchive/timeline.py
@ -0,0 +1,110 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import Image
 import math
 from StringIO import StringIO
 import oxdb_cache
 from subtitles import srt2dict, time_str2msec
 lineWidth = 600
 timlelineHeight = 16
 rowHeight = timlelineHeight + 2 * 4 
 '''
  returns timeline view as a png image for a given movie.
 '''
 def loadTimeline(movie, lines = -1):
  length = int(movie.length / 1000)
  l = int(math.ceil(length / lineWidth) + 1)
  if lines == -1 or l < lines:
    lines = l
  size = (lineWidth, rowHeight * lines)
  timelineColor = (64, 64, 64)
  i = Image.new("RGBA", size)
  for currentLine in range(0, lines):
    offset = currentLine * rowHeight + 4
    try:
      data = oxdb_cache.loadTimeline(movie, "%02d" % (currentLine * 10))
      f = StringIO(data)
      t = Image.open(f)
      t = t.convert('RGBA')
      box = (0, offset , t.size[0], offset + t.size[1])
      i.paste(t, box)
    except:
      width = lineWidth
      if currentLine == lines -1:
        width = length - (lines - 1) * lineWidth
      box = ((0, offset , width, offset + timlelineHeight))
      i.paste(timelineColor, box)
  f = StringIO()
  i.save(f, 'PNG')
  return f.getvalue()
 '''
  returns timeline overlay as a png image for a given movie
  query is used to only highlight scenes matching query
 '''
 def loadTimelineOverlay(movie, query, lines = -1):
  background = (255,255,255,0)
  marker = (255,255,0,128)
  markerBorder = (255,255,0,255)
  length = int(movie.length / 1000)
  l = int(math.ceil(length / lineWidth) + 1)
  if lines == -1 or l < lines:
    lines = l
  size = (lineWidth, rowHeight * lines)
  mask = Image.new("RGBA", size, background)
  for subtitle in movie.overlay(query):
    start = int(round(time_str2msec(subtitle.start) / 1000))
    stop = int(round(time_str2msec(subtitle.stop) / 1000))
    if start < stop:
      currentLine = math.ceil(start / lineWidth)
      if currentLine <= l:
        offset = currentLine * rowHeight + 4
        start = start - ((currentLine) * lineWidth)
        stop = stop - ((currentLine) * lineWidth)
        box = ((start, offset -1, stop, offset + timlelineHeight + 1))
        mask.paste(marker, box)
        borderBox = ((start, offset -1, stop, offset))
        mask.paste(markerBorder, borderBox)
        borderBox = ((start, offset + timlelineHeight, stop, offset + timlelineHeight +1))
        mask.paste(markerBorder, borderBox)
  f = StringIO()
  mask.save(f, 'PNG', quality=70)
  return f.getvalue()
  return Image.composite(image, overlay, mask)
 '''
  returns an image map marking all the scenes with mouse events 
  for a given movie.
 '''
 def loadTimelineImageMap(movie):
  s = movie.subtitleDict
  length = int(movie.length / 1000)
  imageMap ='<map name="timelineImageMap">'
  for key in sorted([int(k) for k in s.keys()]):
    sub = s["%s" % key]
    start = int(round(time_str2msec(sub['start']) / 1000))
    stop = int(round(time_str2msec(sub['stop']) / 1000))
    if start < stop:
      currentLine = math.ceil(start / lineWidth)
      offset = int(currentLine * rowHeight + 4)
      start = int(start - ((currentLine) * lineWidth))
      stop = int(stop - ((currentLine) * lineWidth))
      box = (start, offset -1, stop, offset + timlelineHeight + 1)
      area = '<area class="timelineMarker" shape="rect" coords="%s, %s, %s, %s"' % box
      area += ' onMouseOver="iS(%s, %s, %s, %s)" ' % (start, stop, offset, key)
      area += ' onClick="cS(%s, %s, %s, %s)" ' % (start, stop, offset, key)
      area += ' onMouseOut="oS()" />'
      imageMap += area
  imageMap += "</map>"
  return imageMap
--- a/oxdbarchive/tools/extract_clip.py
+++ b/oxdbarchive/tools/extract_clip.py
@ -0,0 +1,392 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 #
 import os
 import time
 import gobject
 gobject.threads_init()
 import pygst
 pygst.require("0.10")
 import gst
 import Image
 import singledecodebin
 class FirstFrame:
  getShot = False
  length = 0
  height = 0
  def __init__(self, videofile, png='', nseconds=-1, width=128):
    s = ''' filesrc name=input 
          ! decodebin name=dbin 
          ! queue name =q 
          ! ffmpegcolorspace ! video/x-raw-rgb 
          ! fakesink name=output signal-handoffs=true
        '''
    self.width = width
    self.pipeline = gst.parse_launch(s)
    self.input  = self.pipeline.get_by_name('input')
    self.fakesink = self.pipeline.get_by_name('output')
    self.dbin = self.pipeline.get_by_name('dbin')
    self.bus = self.pipeline.get_bus()
    self.input.props.location = videofile
    self.pipeline.set_state(gst.STATE_PAUSED)
    self.pipeline.get_state()
    #length
    queue = self.pipeline.get_by_name('q')
    pads = queue.sink_pads()
    q = gst.query_new_duration(gst.FORMAT_TIME)
    for pad in pads:
      if pad.get_peer() and pad.get_peer().query(q):
        format, self.length = q.parse_duration()
    if nseconds>-1 and png:
      self.png(png, nseconds)
  def close(self):
    self.pipeline.set_state(gst.STATE_NULL)
    self.pipeline.get_state()
  def seek(self, nseconds):
    if(self.length and self.length < nseconds):
      nseconds = 0
    event = gst.event_new_seek(1.0, gst.FORMAT_TIME,
            gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
            gst.SEEK_TYPE_SET, nseconds,
            gst.SEEK_TYPE_NONE, 0)
    res = self.fakesink.send_event(event)
    if res:
      self.pipeline.set_new_stream_time(0L)
    else:
        gst.error("seek to %r failed" % nseconds)
  def png(self, png, nseconds):
    self.png_frame = png
    self.pipeline.set_state(gst.STATE_PAUSED)
    self.pipeline.get_state()
    self.seek(nseconds)
    self.pipeline.set_state(gst.STATE_PLAYING)
    self.pipeline.get_state()
    ho = self.fakesink.connect("handoff", self.snapshot_png_handoff_cb)
    self.getShot = True
    while self.getShot:
      msg = self.bus.poll(gst.MESSAGE_ANY, gst.SECOND)
      if not msg:
        break
    self.fakesink.disconnect(ho)
  def snapshot_png_handoff_cb(self, sink, buffer, pad):
    if self.getShot:
      caps = sink.sink_pads().next().get_negotiated_caps()
      for s in caps:
        input_d = (s['width'], s['height'])
        output_d = self.scaleto(s['width'], s['height'])
      img = Image.fromstring('RGB',input_d,buffer)
      img = img.resize(output_d, Image.ANTIALIAS)
      img.save(self.png_frame)
      self.getShot=False
  def scaleto(self, width, height):
    height = int(self.width / (float(width) / height))
    height = height - height % 2
    self.height = height
    return (self.width, height)
 def file_gnl_src(name, uri, caps, start, duration, offset, priority):
  src = singledecodebin.SingleDecodeBin(caps, uri)
  gnlsrc = gst.element_factory_make('gnlsource', name)
  gnlsrc.props.start = start
  gnlsrc.props.duration = duration
  gnlsrc.props.media_start = offset
  gnlsrc.props.media_duration = duration
  gnlsrc.props.priority = priority
  gnlsrc.add(src)
  return gnlsrc
 class VideoEncoder:
  def __init__(self, output= 'ogg', width = 0, height = 0):
    self.bin = gst.Bin('VideoEncoder')
    self._queue_in = gst.element_factory_make("queue")
    self._queue_in.set_property("max-size-buffers",500)
    self._queue_in.set_property("max-size-time",0)
    self._queue_in.set_property("max-size-bytes",0)
    self._queue_out = gst.element_factory_make("queue")
    self._queue_out.set_property("max-size-buffers",500)
    self._identity = gst.element_factory_make("identity")
    self._identity.set_property('single-segment', True)
    self._ffmpegcolorspace = gst.element_factory_make("ffmpegcolorspace")
    self._videorate = gst.element_factory_make("videorate")
    self._videoscale = gst.element_factory_make("videoscale")
    self._videoscale.props.method = 1
    if output == 'flv':
      self.encoder = gst.element_factory_make("ffenc_flv")
      self.encoder.set_property('bitrate', 200000)
    elif output == 'mp4':
      self.encoder = gst.element_factory_make("x264enc")
      self.encoder.set_property('bitrate', 200)
    else:
      self.encoder = gst.element_factory_make("theoraenc")
      self.encoder.set_property("bitrate", 2000)
      self.encoder.set_property('sharpness', 1)
    self.bin.add(
                 self._queue_in,
                 self._identity,
                 self._ffmpegcolorspace,
                 self._videorate,
                 self._videoscale,
                 self.encoder,
                 self._queue_out
                )
    self._queue_in.link(self._videorate)
    fps = "25/1"
    caps = "video/x-raw-yuv, framerate=(fraction)%s" % fps
    gst_caps = gst.caps_from_string(caps)
    self._videorate.link(self._identity, gst_caps)
    self._identity.link(self._ffmpegcolorspace)
    self._ffmpegcolorspace.link(self._videoscale)
    if width:
      caps = "video/x-raw-yuv, width=%d, " % width
      if height:
        caps += "height=%d," % height
      caps += "pixel-aspect-ratio=(fraction)1/1"
      gst_caps = gst.caps_from_string(caps)
      self._videoscale.link(self.encoder, gst_caps)
    else:
      self._videoscale.link(self.encoder)
    self.encoder.link(self._queue_out)
    # Create GhostPads
    self.bin.add_pad(gst.GhostPad('sink', self._queue_in.get_pad('sink')))
    self.bin.add_pad(gst.GhostPad('src', self._queue_out.get_pad('src')))
 class AudioEncoder:
  def __init__(self, output = 'ogg', samplerate = 44100, channels = 2):
    self.bin = gst.Bin('AudioEncoder')
    self._queue_in = gst.element_factory_make("queue")
    self._queue_in.set_property("max-size-buffers",500)
    self._queue_in.set_property("max-size-time",0)
    self._queue_in.set_property("max-size-bytes",0)
    self._queue_out = gst.element_factory_make("queue")
    self._queue_out.set_property("max-size-buffers",500)
    self._audiorate = gst.element_factory_make("audiorate")
    self._identity = gst.element_factory_make("identity")
    self._identity.set_property('single-segment', True)
    self._audioconvert = gst.element_factory_make("audioconvert")
    self._audioresample = gst.element_factory_make("audioresample")
    if output == 'flv':
      #MP3
      self.encoder = gst.element_factory_make("lame")
      self.encoder.set_property("bitrate", 32)
      self._mp3parse = gst.element_factory_make("mp3parse")
      self.bin.add(self._mp3parse)
      caps = "audio/x-raw-int,rate=%d,channels=%d" % (samplerate, channels)
    elif output == 'mp4':
      #AAC
      self.encoder = gst.element_factory_make("faac")
      #self.encoder.set_property("bitrate", 32)
      caps = "audio/x-raw-int,rate=%d,channels=%d" % (samplerate, channels)
    else:
      #Vorbis
      self.encoder = gst.element_factory_make("vorbisenc")
      self.encoder.set_property("quality", 0)
      caps = "audio/x-raw-float,rate=%d,channels=%d" % (samplerate, channels)
    self.bin.add(
                 self._queue_in,
                 self._audiorate,
                 self._identity,
                 self._audioconvert,
                 self._audioresample,
                 self.encoder,
                 self._queue_out
                )
    self._queue_in.link(self._audiorate)
    self._audiorate.link(self._identity)
    self._identity.link(self._audioconvert)
    self._audioconvert.link(self._audioresample)
    arate = gst.caps_from_string(caps)
    self._audioresample.link(self.encoder, arate)
    if output == 'flv':
      self.encoder.link(self._mp3parse)
      self._mp3parse.link(self._queue_out)
    else:
      self.encoder.link(self._queue_out)
    # Create GhostPads
    self.bin.add_pad(gst.GhostPad('sink', self._queue_in.get_pad('sink')))
    self.bin.add_pad(gst.GhostPad('src', self._queue_out.get_pad('src')))
 class ExtractClip:
  acaps = gst.caps_from_string("audio/x-raw-int;audio/x-raw-float")
  vcaps = gst.caps_from_string("video/x-raw-rgb; video/x-raw-yuv")  
  width = 0
  samplerate = 44100
  channels = 2
  def __init__(self, location, outfile, offset, duration, width, height, samplerate, channels):
    self.mainloop = gobject.MainLoop()
    if outfile.endswith('flv'):
      self.outputformat = 'flv'
    elif outfile.endswith('mp4'):
      self.outputformat = 'mp4'
    else:
      self.outputformat = 'ogg'
    self.width = width
    self.height = height
    self.samplerate = samplerate
    self.channels = channels
    self._pipeline = gst.Pipeline()
    self.bus = self._pipeline.get_bus()
    self.bus.add_signal_watch()
    self.watch_id = self.bus.connect("message", self.onBusMessage)
    self.atimeline = gst.element_factory_make("gnlcomposition", "ATimeline")
    self.atimeline.connect("pad-added", self._on_new_apad)
    self._pipeline.add(self.atimeline)
    self.vtimeline = gst.element_factory_make("gnlcomposition", "VTimeline")
    self.vtimeline.connect("pad-added", self._on_new_vpad)
    self._pipeline.add(self.vtimeline)
    #Mux streams
    if self.outputformat == 'flv':
      self._mux =  gst.element_factory_make("ffmux_flv")
    elif self.outputformat == 'mp4':
      self._mux =  gst.element_factory_make("ffmux_mp4")
    else:
      self._mux =  gst.element_factory_make("oggmux")
    self._pipeline.add(self._mux)
    location = "file://" + os.path.abspath(location)
    self._filesink = gst.element_factory_make("filesink")
    self._filesink.set_property("location", outfile)
    self._pipeline.add(self._filesink)
    self._mux.link(self._filesink)
    media_start = offset
    media_duration = duration
    start = 0 * gst.SECOND
    name = os.path.basename(location)
    #video source
    '''
    vsrc = gst.element_factory_make("gnlfilesource", "%s_v" % name)
    vsrc.props.location = location
    vsrc.props.caps = self.vcaps
    vsrc.props.media_start = media_start
    vsrc.props.media_duration = media_duration + gst.SECOND / 50
    vsrc.props.start = start
    vsrc.props.duration = media_duration
    '''
    vsrc = file_gnl_src("%s_v" % name, 
                        location, self.vcaps, start, 
                        media_duration + gst.SECOND / 25, media_start, 0)
    self.vtimeline.add(vsrc)
    #audio source
    '''
    asrc = gst.element_factory_make("gnlfilesource", "%s_a" % name)
    asrc.props.location = location
    asrc.props.caps = self.acaps
    asrc.props.media_start = media_start
    asrc.props.media_duration = media_duration
    asrc.props.start = start
    asrc.props.duration = media_duration
    '''
    asrc = file_gnl_src("%s_a" % name, 
                        location, self.acaps, start, 
                        media_duration, media_start, 0)
    self.atimeline.add(asrc)
    self.endpos = media_start + media_duration
    self.vencoder = VideoEncoder(self.outputformat, self.width, self.height)
    self._pipeline.add(self.vencoder.bin)
    self.vencoder.bin.link(self._mux)
    self.aencoder = AudioEncoder(self.outputformat, self.samplerate, self.channels)
    self._pipeline.add(self.aencoder.bin)
    self.aencoder.bin.link(self._mux)
  def extract(self):
    self._pipeline.set_state(gst.STATE_PAUSED)
    self._pipeline.get_state()
    self._pipeline.set_state(gst.STATE_PLAYING)
    self._pipeline.get_state()
    self.mainloop.run()
  def onBusMessage(self, bus, message):
    if message.src == self._pipeline and message.type == gst.MESSAGE_EOS:
      self.quit()
    return True
  def quit(self):
    self._pipeline.set_state(gst.STATE_NULL)
    self._pipeline.get_state()
    self.mainloop.quit()
  def _on_new_vpad(self, element, pad):
    pad.link(self.vencoder.bin.get_pad("sink"))
  def _on_new_apad(self, element, pad):
    pad.link(self.aencoder.bin.get_pad("sink"))
 if __name__ == "__main__":
  import sys
  width = 128
  height = 80
  samplerate = 44100
  channels = 1
  inputFile = sys.argv[1]
  outputFile = sys.argv[2]
  offset = int(float(sys.argv[3]) * gst.MSECOND)
  duration = int(float(sys.argv[4]) * gst.MSECOND)
  frameName = "%s.jpg" % os.path.splitext(outputFile)[0]
  f = FirstFrame(inputFile, frameName, offset, width)
  height = f.height
  clip = ExtractClip(inputFile, outputFile, offset, duration, width, height, samplerate, channels)
  clip.extract()
--- a/oxdbarchive/tools/extract_timeline.py
+++ b/oxdbarchive/tools/extract_timeline.py
@ -0,0 +1,121 @@
 #!/usr/bin/env python
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import gobject
 gobject.threads_init()
 import pygst
 pygst.require('0.10')
 import gst
 import Image
 import sys, os, shutil, time
 from glob import glob
 import math
 bar_part_time = 60 * 10
 bar_height = 16
 class GstTimeline:
  def __init__(self, timeline_png):
    self.timeline_png = timeline_png
    self.bar = None
    self.frames = 0
    self.length = 0
    self.number_of_frames = 0
  def addVideo(self, video_file):
    s = ''' filesrc name=input 
        ! decodebin name=dbin 
        ! queue name =q 
        ! ffmpegcolorspace ! video/x-raw-rgb 
        ! fakesink name=output signal-handoffs=true
      '''
    self.pipeline = gst.parse_launch(s)
    self.input  = self.pipeline.get_by_name('input')
    self.fakesink = self.pipeline.get_by_name('output')
    self.dbin = self.pipeline.get_by_name('dbin')
    self.bus = self.pipeline.get_bus()
    self.input.set_property("location", video_file)
    self.pipeline.set_state(gst.STATE_PAUSED)
    self.pipeline.get_state()
    #length
    queue = self.pipeline.get_by_name('q')
    pads = queue.sink_pads()
    q = gst.query_new_duration(gst.FORMAT_TIME)
    for pad in pads:
      if pad.get_peer() and pad.get_peer().query(q):
        format, self.length = q.parse_duration()
    seconds = int(math.ceil(float(self.length) / gst.SECOND))
    print "seconds", seconds
    self.timelineImage = Image.new("RGB", (seconds, bar_height))
    self.timelineImagePos = 0
    #extract frames
    ho = self.fakesink.connect ("handoff", self.snapshot_png_handoff_cb)
    self.pipeline.set_state(gst.STATE_PLAYING)
    self.pipeline.get_state()
    while 1:
      msg = self.bus.poll(gst.MESSAGE_EOS | gst.MESSAGE_ERROR, gst.SECOND)
      if msg:
        break
    self.fakesink.disconnect(ho)
    self.pipeline.set_state(gst.STATE_NULL)
  def cropAndSave(self):
    if self.frames:
      mini_width = int(self.frames / self.fps)
      mini_width = min(mini_width, bar_part_time)
      #crop if segment is shorter
      c = self.bar.crop((0, 0, self.frames,bar_height))
      #resize to 1 pixel per second
      mini = c.resize((mini_width,bar_height), Image.ANTIALIAS)
      #add to timeline png
      self.timelineImage.paste(mini, (self.timelineImagePos, 0))
      self.timelineImagePos += mini_width
    self.frames = 0
  def close(self):
    self.cropAndSave()
    self.timelineImage.save(self.timeline_png)
  def snapshot_png_handoff_cb(self, sink, gst_buffer, pad):
    caps = sink.sink_pads().next().get_negotiated_caps()
    for s in caps:
      input_d = (s['width'], s['height'])
      framerate = s['framerate']
      if not self.bar:
        self.fps = float(framerate.num) / framerate.denom
        self.width_part = int(self.fps * bar_part_time)
        self.bar = Image.new("RGB", (self.width_part + 1 ,bar_height))
    img = Image.fromstring('RGB',input_d, gst_buffer)
    height = 16
    img = img.resize((1, height), Image.ANTIALIAS)
    self.frames += 1
    for i in range(height):
      self.bar.putpixel((self.frames, i), img.getpixel((0,i)))
    if self.frames >= self.width_part and self.frames > 30:
      self.cropAndSave()
 def usage():
  print ""
  print "usage: %s output_base_path video_file" % sys.argv[0]
  print ""
  sys.exit(1)
 if __name__ == "__main__":
  if len(sys.argv) < 2:
    usage()
  if not os.path.exists(sys.argv[1]):
    print "target does not exist"
    sys.exit(1)
  g = GstTimeline(sys.argv[1])
  g.addVideo(sys.argv[2])
  g.close()
--- a/oxdbarchive/tools/shift_srt.py
+++ b/oxdbarchive/tools/shift_srt.py
@ -0,0 +1,89 @@
 #!/usr/bin/env python
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import sys
 import re
 import os
 def srt2dict(srt, encoding = "latin-1"):
  '''convert srt string into a dict in the form
     dict(num = dict(start, stop, text))
  '''
  subdict = {}
  srt = srt.replace('\r', '').strip()
  subtitles = srt.strip().split('\n\n')
  for subtitle in subtitles:
    if subtitle.strip():
      subtitle = subtitle.strip().split('\n')
      if len(subtitle) > 2:
        start_stop = subtitle[1].split(' --> ')
        subtitle[0] ="%s" % int(subtitle[0])
        subdict[subtitle[0]] = {
          'start': start_stop[0], 
          'stop': start_stop[1],
          'text': unicode('\n'.join(subtitle[2:]), encoding),
          }
  return subdict
 def dict2srt(subtitles, encoding = "latin-1"):
  '''convert dict in the form dict(num = dict(start, stop, text))
     into an srt file
  '''
  srt = ''
  for k in sorted([int(k) for k in subtitles.keys()]):
    k = "%s" % k
    srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
      k, 
      subtitles[k]['start'], 
      subtitles[k]['stop'], 
      subtitles[k]['text'])
  srt = srt.strip()
  return srt.encode(encoding)
 def time_str2msec(time_string):
  from datetime import datetime, timedelta
  import time
  msec = float("0." + time_string[-3:])
  time_string = time_string[:-4]
  time_string = "2007 " + time_string
  offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
  base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
  return int((offset - base) * 1000)
 def msec2time_str(msec):
  import time
  msec_string = "%s" % msec
  ms = ",%s" % msec_string[-3:]
  sec = float(msec) / 1000
  return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
 def shift_time(offset, time_string):
  ''' return time shifted by offset milliseconds
      format of time is expedted to be 01:50:52,123
  '''
  new_time = time_str2msec(time_string) + offset
  return msec2time_str(new_time)
 def shift_subtitles(offset, offset_num, subtitles):
  '''
    shifts a subtitle by offset
  '''
  sdict = {}
  for k in sorted([int(k) for k in subtitles.keys()]):
    ko = "%s" % (k + offset_num)
    sdict[ko] = subtitles["%s" % k]
    sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
    sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
  return sdict
 if __name__ == '__main__':
  srt = open(sys.argv[1]).read()
  srtd = srt2dict(srt)
  offset = int(sys.argv[2])
  srtd = shift_subtitles(offset, 0, srtd)
  srt = dict2srt(srtd)
  print srt
--- a/oxdbarchive/tools/singledecodebin.py
+++ b/oxdbarchive/tools/singledecodebin.py
@ -0,0 +1,305 @@
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=4:sts=4:ts=4
 #
 #       pitivi/elements/singledecodebin.py
 #
 # Copyright (c) 2005, Edward Hervey <bilboed@bilboed.com>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
 # License along with this program; if not, write to the
 # Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 # Boston, MA 02111-1307, USA.
 """
 Single-stream queue-less decodebin
 """
 import gobject
 import gst
 def is_raw(caps):
    """ returns True if the caps are RAW """
    rep = caps.to_string()
    valid = ["video/x-raw", "audio/x-raw", "text/plain", "text/x-pango-markup"]
    for val in valid:
        if rep.startswith(val):
            return True
    return False
 class SingleDecodeBin(gst.Bin):
    __gsttemplates__ = (
        gst.PadTemplate ("sinkpadtemplate",
                         gst.PAD_SINK,
                         gst.PAD_ALWAYS,
                         gst.caps_new_any()),
        gst.PadTemplate ("srcpadtemplate",
                         gst.PAD_SRC,
                         gst.PAD_SOMETIMES,
                         gst.caps_new_any())
        )
    def __init__(self, caps=None, uri=None, *args, **kwargs):
        gst.Bin.__init__(self, *args, **kwargs)
        if not caps:
            caps = gst.caps_new_any()
        self.caps = caps
        self.typefind = gst.element_factory_make("typefind", "internal-typefind")
        self.add(self.typefind)
        self.uri = uri
        if self.uri and gst.uri_is_valid(self.uri):
            self.urisrc = gst.element_make_from_uri(gst.URI_SRC, uri, "urisrc")
            self.log("created urisrc %s / %r" % (self.urisrc.get_name(),
                                                 self.urisrc))
            self.add(self.urisrc)
            self.urisrc.link(self.typefind)
        else:
            self._sinkpad = gst.GhostPad("sink", self.typefind.get_pad("sink"))
            self._sinkpad.set_active(True)
            self.add_pad(self._sinkpad)
        self.typefind.connect("have_type", self._typefindHaveTypeCb)
        self._srcpad = None
        self._dynamics = []
        self._validelements = [] #added elements
        self._factories = self._getSortedFactoryList()
    ## internal methods
    def _controlDynamicElement(self, element):
        self.log("element:%s" % element.get_name())
        self._dynamics.append(element)
        element.connect("pad-added", self._dynamicPadAddedCb)
        element.connect("no-more-pads", self._dynamicNoMorePadsCb)
    def _getSortedFactoryList(self):
        """
        Returns the list of demuxers, decoders and parsers available, sorted
        by rank
        """
        def myfilter(fact):
            if fact.get_rank() < 64 :
                return False
            klass = fact.get_klass()
            if not ("Demuxer" in klass or "Decoder" in klass or "Parse" in klass):
                return False
            return True
        reg = gst.registry_get_default()
        res = [x for x in reg.get_feature_list(gst.ElementFactory) if myfilter(x)]
        res.sort(lambda a, b: int(b.get_rank() - a.get_rank()))
        return res
    def _findCompatibleFactory(self, caps):
        """
        Returns a list of factories (sorted by rank) which can take caps as
        input. Returns empty list if none are compatible
        """
        self.debug("caps:%s" % caps.to_string())
        res = []
        for factory in self._factories:
            for template in factory.get_static_pad_templates():
                if template.direction == gst.PAD_SINK:
                    intersect = caps.intersect(template.static_caps.get())
                    if not intersect.is_empty():
                        res.append(factory)
                        break
        self.debug("returning %r" % res)
        return res
    def _closeLink(self, element):
        """
        Inspects element and tries to connect something on the srcpads.
        If there are dynamic pads, it sets up a signal handler to
        continue autoplugging when they become available.
        """
        to_connect = []
        dynamic = False
        templates = element.get_pad_template_list()
        for template in templates:
            if not template.direction == gst.PAD_SRC:
                continue
            if template.presence == gst.PAD_ALWAYS:
                pad = element.get_pad(template.name_template)
                to_connect.append(pad)
            elif template.presence == gst.PAD_SOMETIMES:
                pad = element.get_pad(template.name_template)
                if pad:
                    to_connect.append(pad)
                else:
                    dynamic = True
            else:
                self.log("Template %s is a request pad, ignoring" % pad.name_template)
        if dynamic:
            self.debug("%s is a dynamic element" % element.get_name())
            self._controlDynamicElement(element)
        for pad in to_connect:
            self._closePadLink(element, pad, pad.get_caps())
    def _tryToLink1(self, source, pad, factories):
        """
        Tries to link one of the factories' element to the given pad.
        Returns the element that was successfully linked to the pad.
        """
        self.debug("source:%s, pad:%s , factories:%r" % (source.get_name(),
                                                         pad.get_name(),
                                                         factories))
        result = None
        for factory in factories:
            element = factory.create()
            if not element:
                self.warning("weren't able to create element from %r" % factory)
                continue
            sinkpad = element.get_pad("sink")
            if not sinkpad:
                continue
            self.add(element)
            try:
                pad.link(sinkpad)
            except:
                element.set_state(gst.STATE_NULL)
                self.remove(element)
                continue
            self._closeLink(element)
            element.set_state(gst.STATE_PAUSED)
            result = element
            break
        return result
    def _closePadLink(self, element, pad, caps):
        """
        Finds the list of elements that could connect to the pad.
        If the pad has the desired caps, it will create a ghostpad.
        If no compatible elements could be found, the search will stop.
        """
        self.debug("element:%s, pad:%s, caps:%s" % (element.get_name(),
                                                    pad.get_name(),
                                                    caps.to_string()))
        if caps.is_empty():
            self.log("unknown type")
            return
        if caps.is_any():
            self.log("type is not know yet, waiting")
            return
        if caps.intersect(self.caps):
            # This is the desired caps
            if not self._srcpad:
                self._wrapUp(element, pad)
        elif is_raw(caps):
            self.log("We hit a raw caps which isn't the wanted one")
            # FIXME : recursively remove everything until demux/typefind
        else:
            # Find something
            if len(caps) > 1:
                self.log("many possible types, delaying")
                return
            facts = self._findCompatibleFactory(caps)
            if not facts:
                self.log("unknown type")
                return
            self._tryToLink1(element, pad, facts)
    def _wrapUp(self, element, pad):
        """
        Ghost the given pad of element.
        Remove non-used elements.
        """
        if self._srcpad:
            return
        self._markValidElements(element)
        self._removeUnusedElements(self.typefind)
        self.log("ghosting pad %s" % pad.get_name)
        self._srcpad = gst.GhostPad("src", pad)
        self._srcpad.set_active(True)
        self.add_pad(self._srcpad)
        self.post_message(gst.message_new_state_dirty(self))
    def _markValidElements(self, element):
        """
        Mark this element and upstreams as valid
        """
        self.log("element:%s" % element.get_name())
        if element == self.typefind:
            return
        self._validelements.append(element)
        # find upstream element
        pad = list(element.sink_pads())[0]
        parent = pad.get_peer().get_parent()
        self._markValidElements(parent)
    def _removeUnusedElements(self, element):
        """
        Remove unused elements connected to srcpad(s) of element
        """
        self.log("element:%s" % element)
        for pad in element.src_pads():
            if pad.is_linked():
                peer = pad.get_peer().get_parent()
                self._removeUnusedElements(peer)
                if not peer in self._validelements:
                    self.log("removing %s" % peer.get_name())
                    pad.unlink(pad.get_peer())
                    peer.set_state(gst.STATE_NULL)
                    self.remove(peer)
    def _cleanUp(self):
        self.log("")
        if self._srcpad:
            self.remove_pad(self._srcpad)
        self._srcpad = None
        for element in self._validelements:
            element.set_state(gst.STATE_NULL)
            self.remove(element)
        self._validelements = []
    ## Overrides
    def do_change_state(self, transition):
        self.debug("transition:%r" % transition)
        res = gst.Bin.do_change_state(self, transition)
        if transition in [gst.STATE_CHANGE_PAUSED_TO_READY, gst.STATE_CHANGE_READY_TO_NULL]:
            self._cleanUp()
        return res
    ## Signal callbacks
    def _typefindHaveTypeCb(self, typefind, probability, caps):
        self.debug("probability:%d, caps:%s" % (probability, caps.to_string()))
        self._closePadLink(typefind, typefind.get_pad("src"), caps)
    ## Dynamic element Callbacks
    def _dynamicPadAddedCb(self, element, pad):
        self.log("element:%s, pad:%s" % (element.get_name(), pad.get_name()))
        if not self._srcpad:
            self._closePadLink(element, pad, pad.get_caps())
    def _dynamicNoMorePadsCb(self, element):
        self.log("element:%s" % element.get_name())
 gobject.type_register(SingleDecodeBin)
--- a/oxdbarchive/tools/subtitles.py
+++ b/oxdbarchive/tools/subtitles.py
@ -0,0 +1,139 @@
 #!/usr/bin/env python
 # depends on 
 # subtitleripper - http://subtitleripper.sourceforge.net
 # unrar
 # tesseract-ocr - http://tesseract-ocr.googlecode.com
 #
 import Image
 import os
 import sys
 import shutil
 from glob import glob
 import warnings
 warnings.filterwarnings("ignore", "tempnam")
 colors = ('0,255,255,255', '255,0,255,255', '255,255,0,255', '255,255,255,0')
 def readFile(fname):
  f = open(fname)
  data = f.read()
  f.close()
  return data
 def getColorChangeCount(image_name):
  i = Image.open(image_name)
  background = i.getpixel((0,0))
  c = 0
  max_line_count = 0
  if background == 255:
    for x in range(0, i.size[0]):
      black = 0
      line_count = 0
      for y in range(0, i.size[1]):
        p = i.getpixel((x,y))
        if p == background:
          if black:
            line_count+=1
          black = 0
        else:
          black +=1
      max_line_count = max(line_count, max_line_count)
  return max_line_count
 def getBestMask(filename):
  outputs = {}
  for c in colors:
    output = os.path.join(workdir, c.replace(',', '-'))
    input_file = os.path.join(output,filename)
    change_count = getColorChangeCount(input_file)
    if change_count:
      outputs[change_count] = dict(
                                output=output, 
                                input_file=input_file, 
                              )
  return outputs[min(outputs.keys())]
 #main
 input_base = sys.argv[1]
 if input_base.endswith('.'):
  input_base = input_base[:-1]
 input_base = os.path.abspath(input_base)
 workdir = os.tempnam()
 os.mkdir(workdir)
 os.chdir(workdir)
 input_files = glob("%s*" % input_base)
 sub_file = "%s.sub" % input_base
 rar_file = "%s.rar" % input_base
 idx_file = "%s.idx" % input_base
 srt_file = "%s.srt" % input_base
 working_base = input_base
 if sub_file not in input_files and rar_file in input_files:
  working_base = os.path.join(workdir, os.path.basename(input_base))
  shutil.copy(rar_file, "%s.rar" % working_base)
  rar_file = "%s.rar" % working_base
  sub_file = "%s.sub" % working_base
  shutil.copy(idx_file, "%s.idx" % working_base)
  idx_file = "%s.idx" % working_base
  cmd="unrar x '%s' > /dev/null 2>&1" % rar_file
  os.system(cmd)
  subs = glob("*.sub")
  if subs:
    os.rename(subs[0], sub_file)
  else:
    print "no sub file found"
    #cleanup
    shutil.rmtree(workdir)
    sys.exit(1)
 sub_lang = "en"
 language = ''
 for l in readFile(idx_file).split('\n'):
  if l.startswith('id: %s' % sub_lang):
    language = "-t %s" % l.split('index: ')[-1].strip()
 for c in colors:
  output = os.path.join(workdir, c.replace(',', '-'))
  if not os.path.exists(output):
    os.makedirs(output)
  cmd = "vobsub2pgm %s -c %s '%s' %s/english >/dev/null 2>&1" % (language, c, working_base, output)
  os.system(cmd)
 best_output = getBestMask("english0010.pgm")
 pgms = glob("%s/english*.pgm" % best_output['output'])
 for pgm in sorted(pgms):
  #input_pgm = getBestMask(os.path.basename(pgm))['input_file']
  input_pgm = pgm
  subtitle_tif = os.path.join(workdir, 'subtitle.tif')
  cmd = "convert %s %s;tesseract %s %s >/dev/null 2>&1" %(input_pgm, subtitle_tif, subtitle_tif, pgm)
  os.system(cmd)
 #FIXME what about adding ispell here, interactive again
 cmd = "ispell %s/english*.txt"  % best_output['output']
 cmd = "srttool -s -w < %s/english.srtx > '%s'" % (best_output['output'], srt_file)
 os.system(cmd)
 #correct some common mistaces of tesseract
 sed_script = os.path.join(workdir, 'fix_sed_script')
 f = open(sed_script, 'w')
 f.write('''s/Idn'/ldn'/g
 s/Id'v/ld'v/g
 s/ldn'\!/ldn't/g
 s/\\\/\\l/W/g
 s/V\\\l/W/g
 s/eII/ell/g
 s/></x/g
 ''')
 cmd = """sed -f %s -i '%s'""" % (sed_script, srt_file)
 os.system(cmd)
 #cleanup
 shutil.rmtree(workdir)
--- a/sample-prod.cfg
+++ b/sample-prod.cfg
@ -0,0 +1,84 @@
 [global]
 # This is where all of your settings go for your production environment.
 # You'll copy this file over to your production server and provide it
 # as a command-line option to your start script.
 # Settings that are the same for both development and production
 # (such as template engine, encodings, etc.) all go in 
 # oxdbarchive/config/app.cfg
 # DATABASE
 # pick the form for your database
 # sqlobject.dburi="postgres://username@hostname/databasename"
 # sqlobject.dburi="mysql://username:password@hostname:port/databasename"
 # sqlobject.dburi="sqlite:///file_name_and_path"
 # If you have sqlite, here's a simple default to get you started
 # in development
 sqlobject.dburi="sqlite://%(current_dir_uri)s/devdata.sqlite"
 # if you are using a database or table type without transactions
 # (MySQL default, for example), you should turn off transactions
 # by prepending notrans_ on the uri
 # sqlobject.dburi="notrans_mysql://username:password@hostname:port/databasename"
 # for Windows users, sqlite URIs look like:
 # sqlobject.dburi="sqlite:///drive_letter:/path/to/file"
 # SERVER
 server.environment="production"
 # Sets the number of threads the server uses
 # server.thread_pool = 1
 # if this is part of a larger site, you can set the path
 # to the TurboGears instance here
 # server.webpath=""
 # Set to True if you are deploying your App behind a proxy
 # e.g. Apache using mod_proxy
 # base_url_filter.on = False
 # Set to True if your proxy adds the x_forwarded_host header
 # base_url_filter.use_x_forwarded_host = True
 # If your proxy does not add the x_forwarded_host header, set
 # the following to the *public* host url.
 # (Note: This will be overridden by the use_x_forwarded_host option
 # if it is set to True and the proxy adds the header correctly.
 # base_url_filter.base_url = "http://www.example.com"
 # Set to True if you'd like to abort execution if a controller gets an
 # unexpected parameter. False by default
 # tg.strict_parameters = False
 # LOGGING
 # Logging configuration generally follows the style of the standard
 # Python logging module configuration. Note that when specifying
 # log format messages, you need to use *() for formatting variables.
 # Deployment independent log configuration is in oxdbarchive/config/log.cfg
 [logging]
 [[handlers]]
 [[[access_out]]]
 # set the filename as the first argument below
 args="('server.log',)"
 class='FileHandler'
 level='INFO'
 formatter='message_only'
 [[loggers]]
 [[[oxdbarchive]]]
 level='ERROR'
 qualname='oxdbarchive'
 handlers=['error_out']
 [[[access]]]
 level='INFO'
 qualname='turbogears.access'
 handlers=['access_out']
 propagate=0
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,62 @@
 from setuptools import setup, find_packages
 from turbogears.finddata import find_package_data
 import os
 execfile(os.path.join("oxdbarchive", "release.py"))
 setup(
    name="oxdbarchive",
    version=version,
    # uncomment the following lines if you fill them out in release.py
    #description=description,
    #author=author,
    #author_email=email,
    #url=url,
    #download_url=download_url,
    #license=license,
    install_requires = [
        "TurboGears >= 1.0.2.2",
    ],
    scripts = ["start-oxdbarchive.py"],
    zip_safe=False,
    packages=find_packages(),
    package_data = find_package_data(where='oxdbarchive',
                                     package='oxdbarchive'),
    keywords = [
        # Use keywords if you'll be adding your package to the
        # Python Cheeseshop
        # if this has widgets, uncomment the next line
        # 'turbogears.widgets',
        # if this has a tg-admin command, uncomment the next line
        # 'turbogears.command',
        # if this has identity providers, uncomment the next line
        # 'turbogears.identity.provider',
        # If this is a template plugin, uncomment the next line
        # 'python.templating.engines',
        # If this is a full application, uncomment the next line
        # 'turbogears.app',
    ],
    classifiers = [
        'Development Status :: 3 - Alpha',
        'Operating System :: OS Independent',
        'Programming Language :: Python',
        'Topic :: Software Development :: Libraries :: Python Modules',
        'Framework :: TurboGears',
        # if this is an application that you'll distribute through
        # the Cheeseshop, uncomment the next line
        # 'Framework :: TurboGears :: Applications',
        # if this is a package that includes widgets that you'll distribute
        # through the Cheeseshop, uncomment the next line
        # 'Framework :: TurboGears :: Widgets',
    ],
    test_suite = 'nose.collector',
    )
--- a/start-oxdbarchive.py
+++ b/start-oxdbarchive.py
@ -0,0 +1,25 @@
 #!/usr/bin/env python
 import pkg_resources
 pkg_resources.require("TurboGears")
 from turbogears import update_config, start_server
 import cherrypy
 cherrypy.lowercase_api = True
 from os.path import *
 import sys
 # first look on the command line for a desired config file,
 # if it's not on the command line, then
 # look for setup.py in this directory. If it's not there, this script is
 # probably installed
 if len(sys.argv) > 1:
    update_config(configfile=sys.argv[1], 
        modulename="oxdbarchive.config")
 elif exists(join(dirname(__file__), "setup.py")):
    update_config(configfile="dev.cfg",modulename="oxdbarchive.config")
 else:
    update_config(configfile="prod.cfg",modulename="oxdbarchive.config")
 from oxdbarchive.controllers import Root
 start_server(Root())
--- a/test.cfg
+++ b/test.cfg
@ -0,0 +1,5 @@
 # You can place test-specific configuration options here (like test db uri, etc)
 #
 sqlobject.dburi = "sqlite:///:memory:"
		`@ -0,0 +1,2 @@`
							`db_module=oxdbarchive.model`
							`history_dir=$base/oxdbarchive/sqlobject-history`