migrate to pysubtitles, remove local outdated version

This commit is contained in:
j 2008-05-31 14:16:59 +02:00
parent 2e0b0605a2
commit 3f9407c944
4 changed files with 29 additions and 210 deletions

View file

@ -8,7 +8,7 @@ from os.path import abspath, join, dirname
import shutil import shutil
import time import time
from subtitles import * from subtitles import time2ms
def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0): def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
@ -23,8 +23,8 @@ def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, heigh
print " becomes ", inpoint print " becomes ", inpoint
print "extracting %s -> %s" % (inpoint, outpoint) print "extracting %s -> %s" % (inpoint, outpoint)
duration = time_str2msec(outpoint) - time_str2msec(inpoint) duration = time2ms(outpoint) - time2ms(inpoint)
inpoint = time_str2msec(inpoint) inpoint = time2ms(inpoint)
extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py")) extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration) cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
@ -46,7 +46,7 @@ def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=9
outpoint = shift_time(-offset, outpoint) outpoint = shift_time(-offset, outpoint)
print " becomes ", inpoint print " becomes ", inpoint
print "extracting %s -> %s" % (inpoint, outpoint) print "extracting %s -> %s" % (inpoint, outpoint)
outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1 outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
audiorate = "44100" audiorate = "44100"
if os.path.exists(movie_file): if os.path.exists(movie_file):
@ -87,7 +87,7 @@ def extract_ogg(movie_file, clip_file, inpoint, outpoint, width=128, height=96,
outpoint = shift_time(-offset, outpoint) outpoint = shift_time(-offset, outpoint)
print " becomes ", inpoint print " becomes ", inpoint
print "extracting %s -> %s" % (inpoint, outpoint) print "extracting %s -> %s" % (inpoint, outpoint)
outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1 outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
audiorate = "44100" audiorate = "44100"
if os.path.exists(movie_file): if os.path.exists(movie_file):
@ -153,7 +153,7 @@ def extract_poster_still(movie_file, png_file, inpoint):
if ext in ('sub', 'srt'): if ext in ('sub', 'srt'):
print "this is not a movie file, will not try to extract frames" print "this is not a movie file, will not try to extract frames"
return return
inpoint = time_str2msec(inpoint) inpoint = time2ms(inpoint)
extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py")) extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py"))
cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint) cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint)

View file

@ -17,14 +17,13 @@ from glob import glob
import shutil import shutil
import socket import socket
import simplejson import simplejson
from scrapeit.utils import read_url from scrapeit.utils import read_url
import subtitles
import cache import cache
import oxdb_import import oxdb_import
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
from subtitles import *
from extract import * from extract import *
import midentify import midentify
@ -412,20 +411,20 @@ class ArchiveFile(SQLObject):
def _startPoints(self): def _startPoints(self):
inpoints = [] inpoints = []
if self.srt: if self.srt:
d = srt2dict(self.srt) d = subtitles.Subtitle(self.srt)
for s in d.values(): for s in d:
inpoints.append(s['start']) inpoints.append(s['in'])
elif self.length: elif self.length:
minutes = int((float(self.length) / 1000) / 60) minutes = int((float(self.length) / 1000) / 60)
minute = 1000 * 60 minute = 1000 * 60
inpoints = [msec2time_str(m*minute) for m in range(0, minutes)] inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
return inpoints return inpoints
def _findSubtitleByStart(self, start): def _findSubtitleByInPoint(self, inpoint):
if self.srt: if self.srt:
d = srt2dict(self.srt) d = subtitles.Subtitle(self.srt)
for s in d.values(): for s in d:
if s['start'] == start: if s['in'] == inpoint:
return s return s
return None return None
@ -449,11 +448,11 @@ class ArchiveFile(SQLObject):
height = height - height % 2 height = height - height % 2
inpoint = inpoint.replace('.', ':') inpoint = inpoint.replace('.', ':')
if outpoint == -1: if outpoint == -1:
s = self._findSubtitleByStart(inpoint) s = self._findSubtitleByInPoint(inpoint)
if s: if s:
outpoint = s['stop'] outpoint = s['out']
else: else:
outpoint = shift_time(5000, inpoint) outpoint = subtitles.shiftTime(5000, inpoint)
else: else:
outpoint = outpoint.replace('.', ':') outpoint = outpoint.replace('.', ':')
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0) extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
@ -470,11 +469,11 @@ class ArchiveFile(SQLObject):
height = height - height % 2 height = height - height % 2
inpoint = inpoint.replace('.', ':') inpoint = inpoint.replace('.', ':')
if outpoint == -1: if outpoint == -1:
s = self._findSubtitleByStart(inpoint) s = self._findSubtitleByInPoint(inpoint)
if s: if s:
outpoint = s['stop'] outpoint = s['out']
else: else:
outpoint = shift_time(5000, inpoint) outpoint = subtitles.shiftTime(5000, inpoint)
else: else:
outpoint = outpoint.replace('.', ':') outpoint = outpoint.replace('.', ':')
extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0) extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
@ -585,7 +584,7 @@ class ArchiveFile(SQLObject):
if not subtitle.absolutePath.endswith('.srt'): if not subtitle.absolutePath.endswith('.srt'):
debug("this is not a subtitle %s" % subtitle.absolutePath) debug("this is not a subtitle %s" % subtitle.absolutePath)
return return
movieFile.srt = loadSrt(subtitle.absolutePath) movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
def _set_fps(self, fps): def _set_fps(self, fps):
fps = int(fps * 10000) fps = int(fps * 10000)

View file

@ -1,181 +0,0 @@
# -*- coding: utf-8 -*-
# -*- Mode: Python; -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
import os
from os.path import abspath, join, dirname
import shutil
import time
import chardet
img_extension = 'jpg'
def srt2txt(srt, encoding = "utf-8"):
subtitles = srt2dict(srt, encoding)
txt = ''
for k in sorted([int(k) for k in subtitles]):
txt += "%s\n\n" % subtitles["%s" % k]['text']
return txt.strip()
def srt2dict(srt, encoding = "utf-8"):
'''convert srt string into a dict in the form
dict(num = dict(start, stop, text))
'''
subdict = {}
srt = srt.replace('\r', '').strip()
subtitles = srt.strip().split('\n\n')
for subtitle in subtitles:
if subtitle.strip():
subtitle = subtitle.strip().split('\n')
if len(subtitle) > 2:
start_stop = subtitle[1].split('-->')
start_stop[0] = start_stop[0].strip().split(' ')[0]
start_stop[1] = start_stop[1].strip().split(' ')[0]
start_stop[0] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[0])
start_stop[1] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[1])
subtitle[0] ="%s" % int(subtitle[0])
subdict[subtitle[0]] = {
'start': start_stop[0],
'stop': start_stop[1],
'text': u'\n'.join(subtitle[2:]),
}
return subdict
def dict2srt(subtitles, encoding = "utf-8"):
'''convert dict in the form dict(num = dict(start, stop, text))
into an srt file
'''
srt = ''
for k in sorted([int(k) for k in subtitles]):
k = "%s" % k
srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
k,
subtitles[k]['start'],
subtitles[k]['stop'],
subtitles[k]['text'])
srt = srt.strip()
return srt.encode(encoding)
def time_str2msec(time_string):
from datetime import datetime, timedelta
import time
time_string = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', time_string)
if len(time_string.split(',')) > 1:
msec = float("0." + time_string.split(',')[-1])
else:
msec = 0.0
time_string = time_string.split(',')[0]
time_string = "2007 " + time_string
offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
return int((offset - base) * 1000)
def msec2time_str(msec):
import time
msec_string = "%s" % msec
ms = ",%s" % msec_string[-3:]
sec = float(msec) / 1000
return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
def shift_time(offset, time_string):
''' return time shifted by offset milliseconds
format of time is expedted to be 01:50:52,123
'''
new_time = time_str2msec(time_string) + offset
return msec2time_str(new_time)
def shift_subtitles(offset, offset_num, subtitles):
'''
shifts a subtitle by offset, where offsest is a tuple (time, position)
'''
sdict = {}
for k in sorted([int(k) for k in subtitles]):
ko = "%s" % (k + offset_num)
sdict[ko] = subtitles["%s" % k]
sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
return sdict
def merge_subtitles(subtitles):
'''
converts a list of subtitles / dict(txt, length)
into one srt subtitle
'''
subs = {}
offset = 0
for k in sorted(subtitles):
sdict = srt2dict(subtitles[k]['txt'])
if offset:
sdict = shift_subtitles(offset, len(subs), sdict)
for key in sdict:
subs[key] = sdict[key]
offset += subtitles[k]['length']
return dict2srt(subs)
def split_subtitle(subtitles, offset):
'''
split subtitles at offset
'''
offset_time = time.strftime("%H:%M:%S", offset)
one = {}
two = {}
for k in sorted([int(k) for k in subtitles]):
if subtitles['stop'] < offset_time:
one[k] = subtitle[k]
else:
two[k] = subtitle[k]
two = shift_subtitles(-offset, -len(two), two)
def detectEncoding(fp):
bomDict={ # bytepattern : name
(0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",
(0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
(0xFE, 0xFF, None, None) : "utf_16_be",
(0xFF, 0xFE, None, None) : "utf_16_le",
(0xEF, 0xBB, 0xBF, None) : "utf_8",
}
# go to beginning of file and get the first 4 bytes
oldFP = fp.tell()
fp.seek(0)
(byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
# try bom detection using 4 bytes, 3 bytes, or 2 bytes
bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
if not bomDetection :
bomDetection = bomDict.get((byte1, byte2, byte3, None))
if not bomDetection :
bomDetection = bomDict.get((byte1, byte2, None, None))
## if BOM detected, we're done :-)
fp.seek(oldFP)
if bomDetection :
return bomDetection
encoding = 'latin-1'
#more character detecting magick using http://chardet.feedparser.org/
fp.seek(0)
rawdata = fp.read()
encoding = chardet.detect(rawdata)['encoding']
fp.seek(oldFP)
return encoding
def loadSrt(fname):
f = open(fname)
encoding = detectEncoding(f)
data = f.read()
f.close()
try:
udata = unicode(data, encoding)
except:
try:
udata = unicode(data, 'latin-1')
except:
print "failed to detect encoding, giving up"
udate = u''
if udata.startswith(u'\ufeff'):
udata = udata[1:]
return udata

View file

@ -6,8 +6,9 @@ import Image
import math import math
from StringIO import StringIO from StringIO import StringIO
from subtitles import time2ms
import oxdb_cache import oxdb_cache
from subtitles import srt2dict, time_str2msec
lineWidth = 600 lineWidth = 600
@ -61,8 +62,8 @@ def loadTimelineOverlay(movie, query, lines = -1):
mask = Image.new("RGBA", size, background) mask = Image.new("RGBA", size, background)
for subtitle in movie.overlay(query): for subtitle in movie.overlay(query):
start = int(round(time_str2msec(subtitle.start) / 1000)) start = int(round(time2ms(subtitle.start) / 1000))
stop = int(round(time_str2msec(subtitle.stop) / 1000)) stop = int(round(time2ms(subtitle.stop) / 1000))
if start < stop: if start < stop:
currentLine = math.ceil(start / lineWidth) currentLine = math.ceil(start / lineWidth)
if currentLine <= l: if currentLine <= l:
@ -93,8 +94,8 @@ def loadTimelineImageMap(movie):
imageMap ='<map name="timelineImageMap">' imageMap ='<map name="timelineImageMap">'
for key in sorted([int(k) for k in s]): for key in sorted([int(k) for k in s]):
sub = s["%s" % key] sub = s["%s" % key]
start = int(round(time_str2msec(sub['start']) / 1000)) start = int(round(time2ms(sub['start']) / 1000))
stop = int(round(time_str2msec(sub['stop']) / 1000)) stop = int(round(time2ms(sub['stop']) / 1000))
if start < stop: if start < stop:
currentLine = math.ceil(start / lineWidth) currentLine = math.ceil(start / lineWidth)
offset = int(currentLine * rowHeight + 4) offset = int(currentLine * rowHeight + 4)