From 3f9407c94452694788ff8a6739a5a6302fb3271d Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sat, 31 May 2008 14:16:59 +0200
Subject: [PATCH] migrate to pysubtitles, remove local outdated version

---
 oxdbarchive/extract.py   |  12 +--
 oxdbarchive/model.py     |  35 ++++----
 oxdbarchive/subtitles.py | 181 ---------------------------------------
 oxdbarchive/timeline.py  |  11 +--
 4 files changed, 29 insertions(+), 210 deletions(-)
 delete mode 100644 oxdbarchive/subtitles.py

diff --git a/oxdbarchive/extract.py b/oxdbarchive/extract.py
index 1c524b1..5c2cf82 100644
--- a/oxdbarchive/extract.py
+++ b/oxdbarchive/extract.py
@@ -8,7 +8,7 @@ from os.path import abspath, join, dirname
 import shutil
 import time
 
-from subtitles import *
+from subtitles import time2ms
 
 
 def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
@@ -23,8 +23,8 @@ def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, heigh
     print " becomes ", inpoint
   
   print "extracting %s -> %s" % (inpoint, outpoint)
-  duration = time_str2msec(outpoint) - time_str2msec(inpoint)
-  inpoint = time_str2msec(inpoint)
+  duration = time2ms(outpoint) - time2ms(inpoint)
+  inpoint = time2ms(inpoint)
   extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
 
   cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
@@ -46,7 +46,7 @@ def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=9
     outpoint = shift_time(-offset, outpoint)
     print " becomes ", inpoint
   print "extracting %s -> %s" % (inpoint, outpoint)
-  outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
+  outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
 
   audiorate = "44100"
   if os.path.exists(movie_file):
@@ -87,7 +87,7 @@ def extract_ogg(movie_file, clip_file, inpoint, outpoint, width=128, height=96,
     outpoint = shift_time(-offset, outpoint)
     print " becomes ", inpoint
   print "extracting %s -> %s" % (inpoint, outpoint)
-  outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
+  outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
 
   audiorate = "44100"
   if os.path.exists(movie_file):
@@ -153,7 +153,7 @@ def extract_poster_still(movie_file, png_file, inpoint):
   if ext in ('sub', 'srt'):
     print "this is not a movie file, will not try to extract frames"
     return
-  inpoint = time_str2msec(inpoint)
+  inpoint = time2ms(inpoint)
   extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py"))
 
   cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint)
diff --git a/oxdbarchive/model.py b/oxdbarchive/model.py
index 46a7977..afce4ba 100644
--- a/oxdbarchive/model.py
+++ b/oxdbarchive/model.py
@@ -17,14 +17,13 @@ from glob import glob
 import shutil
 import socket
 
-
 import simplejson
 from scrapeit.utils import read_url
+import subtitles
 
 import cache
 import oxdb_import
 from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
-from subtitles import *
 from extract import *
 import midentify
 
@@ -412,20 +411,20 @@ class ArchiveFile(SQLObject):
   def _startPoints(self):
     inpoints = []
     if self.srt:
-      d = srt2dict(self.srt)
-      for s in d.values():
-        inpoints.append(s['start'])
+      d = subtitles.Subtitle(self.srt)
+      for s in d:
+        inpoints.append(s['in'])
     elif self.length:
       minutes = int((float(self.length) / 1000) / 60)
       minute = 1000 * 60
       inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
     return inpoints
-  
-  def _findSubtitleByStart(self, start):
+
+  def _findSubtitleByInPoint(self, inpoint):
     if self.srt:
-      d = srt2dict(self.srt)
-      for s in d.values():
-        if s['start'] == start:
+      d = subtitles.Subtitle(self.srt)
+      for s in d:
+        if s['in'] == inpoint:
           return s
     return None
     
@@ -449,11 +448,11 @@ class ArchiveFile(SQLObject):
     height = height - height % 2
     inpoint = inpoint.replace('.', ':')
     if outpoint == -1:
-      s = self._findSubtitleByStart(inpoint)
+      s = self._findSubtitleByInPoint(inpoint)
       if s:
-        outpoint = s['stop']
+        outpoint = s['out']
       else:
-        outpoint = shift_time(5000, inpoint)
+        outpoint = subtitles.shiftTime(5000, inpoint)
     else:
       outpoint = outpoint.replace('.', ':')
     extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
@@ -470,11 +469,11 @@ class ArchiveFile(SQLObject):
     height = height - height % 2
     inpoint = inpoint.replace('.', ':')
     if outpoint == -1:
-      s = self._findSubtitleByStart(inpoint)
+      s = self._findSubtitleByInPoint(inpoint)
       if s:
-        outpoint = s['stop']
+        outpoint = s['out']
       else:
-        outpoint = shift_time(5000, inpoint)
+        outpoint = subtitles.shiftTime(5000, inpoint)
     else:
       outpoint = outpoint.replace('.', ':')
     extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
@@ -585,8 +584,8 @@ class ArchiveFile(SQLObject):
       if not subtitle.absolutePath.endswith('.srt'):
         debug("this is not a subtitle %s" % subtitle.absolutePath)
         return
-      movieFile.srt  = loadSrt(subtitle.absolutePath)
-  
+      movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
+
   def _set_fps(self, fps):
     fps = int(fps * 10000)
     self._SO_set_fps(fps)
diff --git a/oxdbarchive/subtitles.py b/oxdbarchive/subtitles.py
deleted file mode 100644
index 4ee6c9a..0000000
--- a/oxdbarchive/subtitles.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# -*- coding: utf-8 -*-
-# -*- Mode: Python; -*-
-# vi:si:et:sw=2:sts=2:ts=2
-
-import re
-import os
-from os.path import abspath, join, dirname
-import shutil
-import time
-
-import chardet
-
-img_extension = 'jpg'
-
-def srt2txt(srt, encoding = "utf-8"):
-  subtitles = srt2dict(srt, encoding)
-  txt = ''
-  for k in sorted([int(k) for k in subtitles]):
-    txt += "%s\n\n" % subtitles["%s" % k]['text']
-  return txt.strip()
-  
-def srt2dict(srt, encoding = "utf-8"):
-  '''convert srt string into a dict in the form
-     dict(num = dict(start, stop, text))
-  '''
-  subdict = {}
-  srt = srt.replace('\r', '').strip()
-  subtitles = srt.strip().split('\n\n')
-  for subtitle in subtitles:
-    if subtitle.strip():
-      subtitle = subtitle.strip().split('\n')
-      if len(subtitle) > 2:
-        start_stop = subtitle[1].split('-->')
-        start_stop[0] = start_stop[0].strip().split(' ')[0]
-        start_stop[1] = start_stop[1].strip().split(' ')[0]
-        start_stop[0] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[0])
-        start_stop[1] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[1])
-        subtitle[0] ="%s" % int(subtitle[0])
-        subdict[subtitle[0]] = {
-          'start': start_stop[0],
-          'stop': start_stop[1],
-          'text': u'\n'.join(subtitle[2:]),
-          }
-  return subdict
-
-def dict2srt(subtitles, encoding = "utf-8"):
-  '''convert dict in the form dict(num = dict(start, stop, text))
-     into an srt file
-  '''
-  srt = ''
-  for k in sorted([int(k) for k in subtitles]):
-    k = "%s" % k
-    srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
-      k, 
-      subtitles[k]['start'], 
-      subtitles[k]['stop'], 
-      subtitles[k]['text'])
-  srt = srt.strip()
-  return srt.encode(encoding)
-  
-def time_str2msec(time_string):
-  from datetime import datetime, timedelta
-  import time
-  time_string = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', time_string)
-  if len(time_string.split(',')) > 1:
-    msec = float("0." + time_string.split(',')[-1])
-  else:
-    msec = 0.0
-  time_string = time_string.split(',')[0]
-  time_string = "2007 " + time_string
-  offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
-  base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
-  return int((offset - base) * 1000)
-
-def msec2time_str(msec):
-  import time
-  msec_string = "%s" % msec
-  ms = ",%s" % msec_string[-3:]
-  sec = float(msec) / 1000
-  return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
-
-def shift_time(offset, time_string):
-  ''' return time shifted by offset milliseconds
-      format of time is expedted to be 01:50:52,123
-  '''
-  new_time = time_str2msec(time_string) + offset
-  return msec2time_str(new_time)
-  
-def shift_subtitles(offset, offset_num, subtitles):
-  '''
-    shifts a subtitle by offset, where offsest is a tuple (time, position)
-  '''
-  sdict = {}
-  for k in sorted([int(k) for k in subtitles]):
-    ko = "%s" % (k + offset_num)
-    sdict[ko] = subtitles["%s" % k]
-    sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
-    sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
-  return sdict
-  
-def merge_subtitles(subtitles):
-  '''
-    converts a list of subtitles / dict(txt, length)
-    into one srt subtitle
-  '''
-  subs = {}
-  offset = 0
-  for k in sorted(subtitles):
-    sdict = srt2dict(subtitles[k]['txt'])
-    if offset:
-      sdict = shift_subtitles(offset, len(subs), sdict)
-    for key in sdict:
-      subs[key] = sdict[key]
-    offset += subtitles[k]['length']
-  return dict2srt(subs)
-
-def split_subtitle(subtitles, offset):
-  '''
-    split subtitles at offset
-  '''
-  offset_time = time.strftime("%H:%M:%S", offset)
-  one = {}
-  two = {}
-  for k in sorted([int(k) for k in subtitles]):
-    if subtitles['stop'] < offset_time:
-      one[k] = subtitle[k]
-    else:
-      two[k] = subtitle[k]
-  two = shift_subtitles(-offset, -len(two), two)
-
-def detectEncoding(fp):
-    bomDict={ # bytepattern : name              
-             (0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",        
-             (0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
-             (0xFE, 0xFF, None, None) : "utf_16_be", 
-             (0xFF, 0xFE, None, None) : "utf_16_le", 
-             (0xEF, 0xBB, 0xBF, None) : "utf_8",
-            }
-
-    # go to beginning of file and get the first 4 bytes
-    oldFP = fp.tell()
-    fp.seek(0)
-    (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
-
-    # try bom detection using 4 bytes, 3 bytes, or 2 bytes
-    bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
-    if not bomDetection :
-        bomDetection = bomDict.get((byte1, byte2, byte3, None))
-        if not bomDetection :
-            bomDetection = bomDict.get((byte1, byte2, None, None))
-
-    ## if BOM detected, we're done :-)
-    fp.seek(oldFP)
-    if bomDetection :
-        return bomDetection
-
-    encoding = 'latin-1'
-    #more character detecting magick using http://chardet.feedparser.org/
-    fp.seek(0)
-    rawdata = fp.read()
-    encoding = chardet.detect(rawdata)['encoding']
-    fp.seek(oldFP)
-    return encoding
-
-def loadSrt(fname):
-  f = open(fname)
-  encoding = detectEncoding(f)
-  data = f.read()
-  f.close()
-  try:
-    udata = unicode(data, encoding)
-  except:
-    try:
-      udata = unicode(data, 'latin-1')
-    except:
-      print "failed to detect encoding, giving up"
-      udate = u''
-  if udata.startswith(u'\ufeff'): 
-    udata = udata[1:]
-  return udata
-  
diff --git a/oxdbarchive/timeline.py b/oxdbarchive/timeline.py
index a35d60e..ce97076 100644
--- a/oxdbarchive/timeline.py
+++ b/oxdbarchive/timeline.py
@@ -6,8 +6,9 @@ import Image
 import math
 from StringIO import StringIO
 
+from subtitles import time2ms
+
 import oxdb_cache
-from subtitles import srt2dict, time_str2msec
 
 
 lineWidth = 600
@@ -61,8 +62,8 @@ def loadTimelineOverlay(movie, query, lines = -1):
   mask = Image.new("RGBA", size, background)
   
   for subtitle in movie.overlay(query):
-    start = int(round(time_str2msec(subtitle.start) / 1000))
-    stop = int(round(time_str2msec(subtitle.stop) / 1000))
+    start = int(round(time2ms(subtitle.start) / 1000))
+    stop = int(round(time2ms(subtitle.stop) / 1000))
     if start < stop:
       currentLine = math.ceil(start / lineWidth)
       if currentLine <= l:
@@ -93,8 +94,8 @@ def loadTimelineImageMap(movie):
   imageMap ='<map name="timelineImageMap">'
   for key in sorted([int(k) for k in s]):
     sub = s["%s" % key]
-    start = int(round(time_str2msec(sub['start']) / 1000))
-    stop = int(round(time_str2msec(sub['stop']) / 1000))
+    start = int(round(time2ms(sub['start']) / 1000))
+    stop = int(round(time2ms(sub['stop']) / 1000))
     if start < stop:
       currentLine = math.ceil(start / lineWidth)
       offset = int(currentLine * rowHeight + 4)