migrate to pysubtitles, remove local outdated version

2008-05-31 14:16:59 +02:00 · 2008-05-31 14:16:59 +02:00 · 3f9407c944
commit 3f9407c944
parent 2e0b0605a2
4 changed files with 29 additions and 210 deletions
--- a/oxdbarchive/extract.py
+++ b/oxdbarchive/extract.py
@ -8,7 +8,7 @@ from os.path import abspath, join, dirname
 import shutil
 import time
-from subtitles import *
+from subtitles import time2ms
 def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
@ -23,8 +23,8 @@ def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, heigh
    print " becomes ", inpoint
  print "extracting %s -> %s" % (inpoint, outpoint)
-  duration = time_str2msec(outpoint) - time_str2msec(inpoint)
+  duration = time2ms(outpoint) - time2ms(inpoint)
-  inpoint = time_str2msec(inpoint)
+  inpoint = time2ms(inpoint)
  extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
  cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
@ -46,7 +46,7 @@ def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=9
    outpoint = shift_time(-offset, outpoint)
    print " becomes ", inpoint
  print "extracting %s -> %s" % (inpoint, outpoint)
-  outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
+  outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
  audiorate = "44100"
  if os.path.exists(movie_file):
@ -87,7 +87,7 @@ def extract_ogg(movie_file, clip_file, inpoint, outpoint, width=128, height=96,
    outpoint = shift_time(-offset, outpoint)
    print " becomes ", inpoint
  print "extracting %s -> %s" % (inpoint, outpoint)
-  outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
+  outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
  audiorate = "44100"
  if os.path.exists(movie_file):
@ -153,7 +153,7 @@ def extract_poster_still(movie_file, png_file, inpoint):
  if ext in ('sub', 'srt'):
    print "this is not a movie file, will not try to extract frames"
    return
-  inpoint = time_str2msec(inpoint)
+  inpoint = time2ms(inpoint)
  extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py"))
  cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint)
--- a/oxdbarchive/model.py
+++ b/oxdbarchive/model.py
@ -17,14 +17,13 @@ from glob import glob
 import shutil
 import socket
 import simplejson
 from scrapeit.utils import read_url
 import subtitles
 import cache
 import oxdb_import
 from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
 from subtitles import *
 from extract import *
 import midentify
@ -412,20 +411,20 @@ class ArchiveFile(SQLObject):
  def _startPoints(self):
    inpoints = []
    if self.srt:
-      d = srt2dict(self.srt)
+      d = subtitles.Subtitle(self.srt)
-      for s in d.values():
+      for s in d:
-        inpoints.append(s['start'])
+        inpoints.append(s['in'])
    elif self.length:
      minutes = int((float(self.length) / 1000) / 60)
      minute = 1000 * 60
      inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
    return inpoints
-  def _findSubtitleByStart(self, start):
+  def _findSubtitleByInPoint(self, inpoint):
    if self.srt:
-      d = srt2dict(self.srt)
+      d = subtitles.Subtitle(self.srt)
-      for s in d.values():
+      for s in d:
-        if s['start'] == start:
+        if s['in'] == inpoint:
          return s
    return None
@ -449,11 +448,11 @@ class ArchiveFile(SQLObject):
    height = height - height % 2
    inpoint = inpoint.replace('.', ':')
    if outpoint == -1:
-      s = self._findSubtitleByStart(inpoint)
+      s = self._findSubtitleByInPoint(inpoint)
      if s:
-        outpoint = s['stop']
+        outpoint = s['out']
      else:
-        outpoint = shift_time(5000, inpoint)
+        outpoint = subtitles.shiftTime(5000, inpoint)
    else:
      outpoint = outpoint.replace('.', ':')
    extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
@ -470,11 +469,11 @@ class ArchiveFile(SQLObject):
    height = height - height % 2
    inpoint = inpoint.replace('.', ':')
    if outpoint == -1:
-      s = self._findSubtitleByStart(inpoint)
+      s = self._findSubtitleByInPoint(inpoint)
      if s:
-        outpoint = s['stop']
+        outpoint = s['out']
      else:
-        outpoint = shift_time(5000, inpoint)
+        outpoint = subtitles.shiftTime(5000, inpoint)
    else:
      outpoint = outpoint.replace('.', ':')
    extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
@ -585,7 +584,7 @@ class ArchiveFile(SQLObject):
      if not subtitle.absolutePath.endswith('.srt'):
        debug("this is not a subtitle %s" % subtitle.absolutePath)
        return
-      movieFile.srt  = loadSrt(subtitle.absolutePath)
+      movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
  def _set_fps(self, fps):
    fps = int(fps * 10000)
--- a/oxdbarchive/subtitles.py
+++ b/oxdbarchive/subtitles.py
@ -1,181 +0,0 @@
 # -*- coding: utf-8 -*-
 # -*- Mode: Python; -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import re
 import os
 from os.path import abspath, join, dirname
 import shutil
 import time
 import chardet
 img_extension = 'jpg'
 def srt2txt(srt, encoding = "utf-8"):
  subtitles = srt2dict(srt, encoding)
  txt = ''
  for k in sorted([int(k) for k in subtitles]):
    txt += "%s\n\n" % subtitles["%s" % k]['text']
  return txt.strip()
 def srt2dict(srt, encoding = "utf-8"):
  '''convert srt string into a dict in the form
     dict(num = dict(start, stop, text))
  '''
  subdict = {}
  srt = srt.replace('\r', '').strip()
  subtitles = srt.strip().split('\n\n')
  for subtitle in subtitles:
    if subtitle.strip():
      subtitle = subtitle.strip().split('\n')
      if len(subtitle) > 2:
        start_stop = subtitle[1].split('-->')
        start_stop[0] = start_stop[0].strip().split(' ')[0]
        start_stop[1] = start_stop[1].strip().split(' ')[0]
        start_stop[0] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[0])
        start_stop[1] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[1])
        subtitle[0] ="%s" % int(subtitle[0])
        subdict[subtitle[0]] = {
          'start': start_stop[0],
          'stop': start_stop[1],
          'text': u'\n'.join(subtitle[2:]),
          }
  return subdict
 def dict2srt(subtitles, encoding = "utf-8"):
  '''convert dict in the form dict(num = dict(start, stop, text))
     into an srt file
  '''
  srt = ''
  for k in sorted([int(k) for k in subtitles]):
    k = "%s" % k
    srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
      k, 
      subtitles[k]['start'], 
      subtitles[k]['stop'], 
      subtitles[k]['text'])
  srt = srt.strip()
  return srt.encode(encoding)
 def time_str2msec(time_string):
  from datetime import datetime, timedelta
  import time
  time_string = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', time_string)
  if len(time_string.split(',')) > 1:
    msec = float("0." + time_string.split(',')[-1])
  else:
    msec = 0.0
  time_string = time_string.split(',')[0]
  time_string = "2007 " + time_string
  offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
  base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
  return int((offset - base) * 1000)
 def msec2time_str(msec):
  import time
  msec_string = "%s" % msec
  ms = ",%s" % msec_string[-3:]
  sec = float(msec) / 1000
  return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
 def shift_time(offset, time_string):
  ''' return time shifted by offset milliseconds
      format of time is expedted to be 01:50:52,123
  '''
  new_time = time_str2msec(time_string) + offset
  return msec2time_str(new_time)
 def shift_subtitles(offset, offset_num, subtitles):
  '''
    shifts a subtitle by offset, where offsest is a tuple (time, position)
  '''
  sdict = {}
  for k in sorted([int(k) for k in subtitles]):
    ko = "%s" % (k + offset_num)
    sdict[ko] = subtitles["%s" % k]
    sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
    sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
  return sdict
 def merge_subtitles(subtitles):
  '''
    converts a list of subtitles / dict(txt, length)
    into one srt subtitle
  '''
  subs = {}
  offset = 0
  for k in sorted(subtitles):
    sdict = srt2dict(subtitles[k]['txt'])
    if offset:
      sdict = shift_subtitles(offset, len(subs), sdict)
    for key in sdict:
      subs[key] = sdict[key]
    offset += subtitles[k]['length']
  return dict2srt(subs)
 def split_subtitle(subtitles, offset):
  '''
    split subtitles at offset
  '''
  offset_time = time.strftime("%H:%M:%S", offset)
  one = {}
  two = {}
  for k in sorted([int(k) for k in subtitles]):
    if subtitles['stop'] < offset_time:
      one[k] = subtitle[k]
    else:
      two[k] = subtitle[k]
  two = shift_subtitles(-offset, -len(two), two)
 def detectEncoding(fp):
    bomDict={ # bytepattern : name              
             (0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",        
             (0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
             (0xFE, 0xFF, None, None) : "utf_16_be", 
             (0xFF, 0xFE, None, None) : "utf_16_le", 
             (0xEF, 0xBB, 0xBF, None) : "utf_8",
            }
    # go to beginning of file and get the first 4 bytes
    oldFP = fp.tell()
    fp.seek(0)
    (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
    # try bom detection using 4 bytes, 3 bytes, or 2 bytes
    bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
    if not bomDetection :
        bomDetection = bomDict.get((byte1, byte2, byte3, None))
        if not bomDetection :
            bomDetection = bomDict.get((byte1, byte2, None, None))
    ## if BOM detected, we're done :-)
    fp.seek(oldFP)
    if bomDetection :
        return bomDetection
    encoding = 'latin-1'
    #more character detecting magick using http://chardet.feedparser.org/
    fp.seek(0)
    rawdata = fp.read()
    encoding = chardet.detect(rawdata)['encoding']
    fp.seek(oldFP)
    return encoding
 def loadSrt(fname):
  f = open(fname)
  encoding = detectEncoding(f)
  data = f.read()
  f.close()
  try:
    udata = unicode(data, encoding)
  except:
    try:
      udata = unicode(data, 'latin-1')
    except:
      print "failed to detect encoding, giving up"
      udate = u''
  if udata.startswith(u'\ufeff'): 
    udata = udata[1:]
  return udata
--- a/oxdbarchive/timeline.py
+++ b/oxdbarchive/timeline.py
@ -6,8 +6,9 @@ import Image
 import math
 from StringIO import StringIO
 from subtitles import time2ms
 import oxdb_cache
 from subtitles import srt2dict, time_str2msec
 lineWidth = 600
@ -61,8 +62,8 @@ def loadTimelineOverlay(movie, query, lines = -1):
  mask = Image.new("RGBA", size, background)
  for subtitle in movie.overlay(query):
-    start = int(round(time_str2msec(subtitle.start) / 1000))
+    start = int(round(time2ms(subtitle.start) / 1000))
-    stop = int(round(time_str2msec(subtitle.stop) / 1000))
+    stop = int(round(time2ms(subtitle.stop) / 1000))
    if start < stop:
      currentLine = math.ceil(start / lineWidth)
      if currentLine <= l:
@ -93,8 +94,8 @@ def loadTimelineImageMap(movie):
  imageMap ='<map name="timelineImageMap">'
  for key in sorted([int(k) for k in s]):
    sub = s["%s" % key]
-    start = int(round(time_str2msec(sub['start']) / 1000))
+    start = int(round(time2ms(sub['start']) / 1000))
-    stop = int(round(time_str2msec(sub['stop']) / 1000))
+    stop = int(round(time2ms(sub['stop']) / 1000))
    if start < stop:
      currentLine = math.ceil(start / lineWidth)
      offset = int(currentLine * rowHeight + 4)