From 3f9407c94452694788ff8a6739a5a6302fb3271d Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 31 May 2008 14:16:59 +0200 Subject: [PATCH] migrate to pysubtitles, remove local outdated version --- oxdbarchive/extract.py | 12 +-- oxdbarchive/model.py | 35 ++++---- oxdbarchive/subtitles.py | 181 --------------------------------------- oxdbarchive/timeline.py | 11 +-- 4 files changed, 29 insertions(+), 210 deletions(-) delete mode 100644 oxdbarchive/subtitles.py diff --git a/oxdbarchive/extract.py b/oxdbarchive/extract.py index 1c524b1..5c2cf82 100644 --- a/oxdbarchive/extract.py +++ b/oxdbarchive/extract.py @@ -8,7 +8,7 @@ from os.path import abspath, join, dirname import shutil import time -from subtitles import * +from subtitles import time2ms def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0): @@ -23,8 +23,8 @@ def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, heigh print " becomes ", inpoint print "extracting %s -> %s" % (inpoint, outpoint) - duration = time_str2msec(outpoint) - time_str2msec(inpoint) - inpoint = time_str2msec(inpoint) + duration = time2ms(outpoint) - time2ms(inpoint) + inpoint = time2ms(inpoint) extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py")) cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration) @@ -46,7 +46,7 @@ def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=9 outpoint = shift_time(-offset, outpoint) print " becomes ", inpoint print "extracting %s -> %s" % (inpoint, outpoint) - outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1 + outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1 audiorate = "44100" if os.path.exists(movie_file): @@ -87,7 +87,7 @@ def extract_ogg(movie_file, clip_file, inpoint, outpoint, width=128, height=96, outpoint = shift_time(-offset, outpoint) print " becomes ", inpoint print "extracting %s -> %s" % (inpoint, outpoint) - outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1 + outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1 audiorate = "44100" if os.path.exists(movie_file): @@ -153,7 +153,7 @@ def extract_poster_still(movie_file, png_file, inpoint): if ext in ('sub', 'srt'): print "this is not a movie file, will not try to extract frames" return - inpoint = time_str2msec(inpoint) + inpoint = time2ms(inpoint) extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py")) cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint) diff --git a/oxdbarchive/model.py b/oxdbarchive/model.py index 46a7977..afce4ba 100644 --- a/oxdbarchive/model.py +++ b/oxdbarchive/model.py @@ -17,14 +17,13 @@ from glob import glob import shutil import socket - import simplejson from scrapeit.utils import read_url +import subtitles import cache import oxdb_import from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir -from subtitles import * from extract import * import midentify @@ -412,20 +411,20 @@ class ArchiveFile(SQLObject): def _startPoints(self): inpoints = [] if self.srt: - d = srt2dict(self.srt) - for s in d.values(): - inpoints.append(s['start']) + d = subtitles.Subtitle(self.srt) + for s in d: + inpoints.append(s['in']) elif self.length: minutes = int((float(self.length) / 1000) / 60) minute = 1000 * 60 inpoints = [msec2time_str(m*minute) for m in range(0, minutes)] return inpoints - - def _findSubtitleByStart(self, start): + + def _findSubtitleByInPoint(self, inpoint): if self.srt: - d = srt2dict(self.srt) - for s in d.values(): - if s['start'] == start: + d = subtitles.Subtitle(self.srt) + for s in d: + if s['in'] == inpoint: return s return None @@ -449,11 +448,11 @@ class ArchiveFile(SQLObject): height = height - height % 2 inpoint = inpoint.replace('.', ':') if outpoint == -1: - s = self._findSubtitleByStart(inpoint) + s = self._findSubtitleByInPoint(inpoint) if s: - outpoint = s['stop'] + outpoint = s['out'] else: - outpoint = shift_time(5000, inpoint) + outpoint = subtitles.shiftTime(5000, inpoint) else: outpoint = outpoint.replace('.', ':') extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0) @@ -470,11 +469,11 @@ class ArchiveFile(SQLObject): height = height - height % 2 inpoint = inpoint.replace('.', ':') if outpoint == -1: - s = self._findSubtitleByStart(inpoint) + s = self._findSubtitleByInPoint(inpoint) if s: - outpoint = s['stop'] + outpoint = s['out'] else: - outpoint = shift_time(5000, inpoint) + outpoint = subtitles.shiftTime(5000, inpoint) else: outpoint = outpoint.replace('.', ':') extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0) @@ -585,8 +584,8 @@ class ArchiveFile(SQLObject): if not subtitle.absolutePath.endswith('.srt'): debug("this is not a subtitle %s" % subtitle.absolutePath) return - movieFile.srt = loadSrt(subtitle.absolutePath) - + movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt() + def _set_fps(self, fps): fps = int(fps * 10000) self._SO_set_fps(fps) diff --git a/oxdbarchive/subtitles.py b/oxdbarchive/subtitles.py deleted file mode 100644 index 4ee6c9a..0000000 --- a/oxdbarchive/subtitles.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- -# -*- Mode: Python; -*- -# vi:si:et:sw=2:sts=2:ts=2 - -import re -import os -from os.path import abspath, join, dirname -import shutil -import time - -import chardet - -img_extension = 'jpg' - -def srt2txt(srt, encoding = "utf-8"): - subtitles = srt2dict(srt, encoding) - txt = '' - for k in sorted([int(k) for k in subtitles]): - txt += "%s\n\n" % subtitles["%s" % k]['text'] - return txt.strip() - -def srt2dict(srt, encoding = "utf-8"): - '''convert srt string into a dict in the form - dict(num = dict(start, stop, text)) - ''' - subdict = {} - srt = srt.replace('\r', '').strip() - subtitles = srt.strip().split('\n\n') - for subtitle in subtitles: - if subtitle.strip(): - subtitle = subtitle.strip().split('\n') - if len(subtitle) > 2: - start_stop = subtitle[1].split('-->') - start_stop[0] = start_stop[0].strip().split(' ')[0] - start_stop[1] = start_stop[1].strip().split(' ')[0] - start_stop[0] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[0]) - start_stop[1] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[1]) - subtitle[0] ="%s" % int(subtitle[0]) - subdict[subtitle[0]] = { - 'start': start_stop[0], - 'stop': start_stop[1], - 'text': u'\n'.join(subtitle[2:]), - } - return subdict - -def dict2srt(subtitles, encoding = "utf-8"): - '''convert dict in the form dict(num = dict(start, stop, text)) - into an srt file - ''' - srt = '' - for k in sorted([int(k) for k in subtitles]): - k = "%s" % k - srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % ( - k, - subtitles[k]['start'], - subtitles[k]['stop'], - subtitles[k]['text']) - srt = srt.strip() - return srt.encode(encoding) - -def time_str2msec(time_string): - from datetime import datetime, timedelta - import time - time_string = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', time_string) - if len(time_string.split(',')) > 1: - msec = float("0." + time_string.split(',')[-1]) - else: - msec = 0.0 - time_string = time_string.split(',')[0] - time_string = "2007 " + time_string - offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec - base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S")) - return int((offset - base) * 1000) - -def msec2time_str(msec): - import time - msec_string = "%s" % msec - ms = ",%s" % msec_string[-3:] - sec = float(msec) / 1000 - return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms - -def shift_time(offset, time_string): - ''' return time shifted by offset milliseconds - format of time is expedted to be 01:50:52,123 - ''' - new_time = time_str2msec(time_string) + offset - return msec2time_str(new_time) - -def shift_subtitles(offset, offset_num, subtitles): - ''' - shifts a subtitle by offset, where offsest is a tuple (time, position) - ''' - sdict = {} - for k in sorted([int(k) for k in subtitles]): - ko = "%s" % (k + offset_num) - sdict[ko] = subtitles["%s" % k] - sdict[ko]['start'] = shift_time(offset, sdict[ko]['start']) - sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop']) - return sdict - -def merge_subtitles(subtitles): - ''' - converts a list of subtitles / dict(txt, length) - into one srt subtitle - ''' - subs = {} - offset = 0 - for k in sorted(subtitles): - sdict = srt2dict(subtitles[k]['txt']) - if offset: - sdict = shift_subtitles(offset, len(subs), sdict) - for key in sdict: - subs[key] = sdict[key] - offset += subtitles[k]['length'] - return dict2srt(subs) - -def split_subtitle(subtitles, offset): - ''' - split subtitles at offset - ''' - offset_time = time.strftime("%H:%M:%S", offset) - one = {} - two = {} - for k in sorted([int(k) for k in subtitles]): - if subtitles['stop'] < offset_time: - one[k] = subtitle[k] - else: - two[k] = subtitle[k] - two = shift_subtitles(-offset, -len(two), two) - -def detectEncoding(fp): - bomDict={ # bytepattern : name - (0x00, 0x00, 0xFE, 0xFF) : "utf_32_be", - (0xFF, 0xFE, 0x00, 0x00) : "utf_32_le", - (0xFE, 0xFF, None, None) : "utf_16_be", - (0xFF, 0xFE, None, None) : "utf_16_le", - (0xEF, 0xBB, 0xBF, None) : "utf_8", - } - - # go to beginning of file and get the first 4 bytes - oldFP = fp.tell() - fp.seek(0) - (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4))) - - # try bom detection using 4 bytes, 3 bytes, or 2 bytes - bomDetection = bomDict.get((byte1, byte2, byte3, byte4)) - if not bomDetection : - bomDetection = bomDict.get((byte1, byte2, byte3, None)) - if not bomDetection : - bomDetection = bomDict.get((byte1, byte2, None, None)) - - ## if BOM detected, we're done :-) - fp.seek(oldFP) - if bomDetection : - return bomDetection - - encoding = 'latin-1' - #more character detecting magick using http://chardet.feedparser.org/ - fp.seek(0) - rawdata = fp.read() - encoding = chardet.detect(rawdata)['encoding'] - fp.seek(oldFP) - return encoding - -def loadSrt(fname): - f = open(fname) - encoding = detectEncoding(f) - data = f.read() - f.close() - try: - udata = unicode(data, encoding) - except: - try: - udata = unicode(data, 'latin-1') - except: - print "failed to detect encoding, giving up" - udate = u'' - if udata.startswith(u'\ufeff'): - udata = udata[1:] - return udata - diff --git a/oxdbarchive/timeline.py b/oxdbarchive/timeline.py index a35d60e..ce97076 100644 --- a/oxdbarchive/timeline.py +++ b/oxdbarchive/timeline.py @@ -6,8 +6,9 @@ import Image import math from StringIO import StringIO +from subtitles import time2ms + import oxdb_cache -from subtitles import srt2dict, time_str2msec lineWidth = 600 @@ -61,8 +62,8 @@ def loadTimelineOverlay(movie, query, lines = -1): mask = Image.new("RGBA", size, background) for subtitle in movie.overlay(query): - start = int(round(time_str2msec(subtitle.start) / 1000)) - stop = int(round(time_str2msec(subtitle.stop) / 1000)) + start = int(round(time2ms(subtitle.start) / 1000)) + stop = int(round(time2ms(subtitle.stop) / 1000)) if start < stop: currentLine = math.ceil(start / lineWidth) if currentLine <= l: @@ -93,8 +94,8 @@ def loadTimelineImageMap(movie): imageMap ='' for key in sorted([int(k) for k in s]): sub = s["%s" % key] - start = int(round(time_str2msec(sub['start']) / 1000)) - stop = int(round(time_str2msec(sub['stop']) / 1000)) + start = int(round(time2ms(sub['start']) / 1000)) + stop = int(round(time2ms(sub['stop']) / 1000)) if start < stop: currentLine = math.ceil(start / lineWidth) offset = int(currentLine * rowHeight + 4)