migrate to pysubtitles, remove local outdated version
This commit is contained in:
parent
2e0b0605a2
commit
3f9407c944
4 changed files with 29 additions and 210 deletions
|
@ -8,7 +8,7 @@ from os.path import abspath, join, dirname
|
|||
import shutil
|
||||
import time
|
||||
|
||||
from subtitles import *
|
||||
from subtitles import time2ms
|
||||
|
||||
|
||||
def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
|
||||
|
@ -23,8 +23,8 @@ def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, heigh
|
|||
print " becomes ", inpoint
|
||||
|
||||
print "extracting %s -> %s" % (inpoint, outpoint)
|
||||
duration = time_str2msec(outpoint) - time_str2msec(inpoint)
|
||||
inpoint = time_str2msec(inpoint)
|
||||
duration = time2ms(outpoint) - time2ms(inpoint)
|
||||
inpoint = time2ms(inpoint)
|
||||
extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
|
||||
|
||||
cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
|
||||
|
@ -46,7 +46,7 @@ def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=9
|
|||
outpoint = shift_time(-offset, outpoint)
|
||||
print " becomes ", inpoint
|
||||
print "extracting %s -> %s" % (inpoint, outpoint)
|
||||
outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
|
||||
outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
|
||||
|
||||
audiorate = "44100"
|
||||
if os.path.exists(movie_file):
|
||||
|
@ -87,7 +87,7 @@ def extract_ogg(movie_file, clip_file, inpoint, outpoint, width=128, height=96,
|
|||
outpoint = shift_time(-offset, outpoint)
|
||||
print " becomes ", inpoint
|
||||
print "extracting %s -> %s" % (inpoint, outpoint)
|
||||
outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
|
||||
outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
|
||||
|
||||
audiorate = "44100"
|
||||
if os.path.exists(movie_file):
|
||||
|
@ -153,7 +153,7 @@ def extract_poster_still(movie_file, png_file, inpoint):
|
|||
if ext in ('sub', 'srt'):
|
||||
print "this is not a movie file, will not try to extract frames"
|
||||
return
|
||||
inpoint = time_str2msec(inpoint)
|
||||
inpoint = time2ms(inpoint)
|
||||
extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py"))
|
||||
|
||||
cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint)
|
||||
|
|
|
@ -17,14 +17,13 @@ from glob import glob
|
|||
import shutil
|
||||
import socket
|
||||
|
||||
|
||||
import simplejson
|
||||
from scrapeit.utils import read_url
|
||||
import subtitles
|
||||
|
||||
import cache
|
||||
import oxdb_import
|
||||
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
|
||||
from subtitles import *
|
||||
from extract import *
|
||||
import midentify
|
||||
|
||||
|
@ -412,20 +411,20 @@ class ArchiveFile(SQLObject):
|
|||
def _startPoints(self):
|
||||
inpoints = []
|
||||
if self.srt:
|
||||
d = srt2dict(self.srt)
|
||||
for s in d.values():
|
||||
inpoints.append(s['start'])
|
||||
d = subtitles.Subtitle(self.srt)
|
||||
for s in d:
|
||||
inpoints.append(s['in'])
|
||||
elif self.length:
|
||||
minutes = int((float(self.length) / 1000) / 60)
|
||||
minute = 1000 * 60
|
||||
inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
|
||||
return inpoints
|
||||
|
||||
def _findSubtitleByStart(self, start):
|
||||
def _findSubtitleByInPoint(self, inpoint):
|
||||
if self.srt:
|
||||
d = srt2dict(self.srt)
|
||||
for s in d.values():
|
||||
if s['start'] == start:
|
||||
d = subtitles.Subtitle(self.srt)
|
||||
for s in d:
|
||||
if s['in'] == inpoint:
|
||||
return s
|
||||
return None
|
||||
|
||||
|
@ -449,11 +448,11 @@ class ArchiveFile(SQLObject):
|
|||
height = height - height % 2
|
||||
inpoint = inpoint.replace('.', ':')
|
||||
if outpoint == -1:
|
||||
s = self._findSubtitleByStart(inpoint)
|
||||
s = self._findSubtitleByInPoint(inpoint)
|
||||
if s:
|
||||
outpoint = s['stop']
|
||||
outpoint = s['out']
|
||||
else:
|
||||
outpoint = shift_time(5000, inpoint)
|
||||
outpoint = subtitles.shiftTime(5000, inpoint)
|
||||
else:
|
||||
outpoint = outpoint.replace('.', ':')
|
||||
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
|
||||
|
@ -470,11 +469,11 @@ class ArchiveFile(SQLObject):
|
|||
height = height - height % 2
|
||||
inpoint = inpoint.replace('.', ':')
|
||||
if outpoint == -1:
|
||||
s = self._findSubtitleByStart(inpoint)
|
||||
s = self._findSubtitleByInPoint(inpoint)
|
||||
if s:
|
||||
outpoint = s['stop']
|
||||
outpoint = s['out']
|
||||
else:
|
||||
outpoint = shift_time(5000, inpoint)
|
||||
outpoint = subtitles.shiftTime(5000, inpoint)
|
||||
else:
|
||||
outpoint = outpoint.replace('.', ':')
|
||||
extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
|
||||
|
@ -585,7 +584,7 @@ class ArchiveFile(SQLObject):
|
|||
if not subtitle.absolutePath.endswith('.srt'):
|
||||
debug("this is not a subtitle %s" % subtitle.absolutePath)
|
||||
return
|
||||
movieFile.srt = loadSrt(subtitle.absolutePath)
|
||||
movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
|
||||
|
||||
def _set_fps(self, fps):
|
||||
fps = int(fps * 10000)
|
||||
|
|
|
@ -1,181 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- Mode: Python; -*-
|
||||
# vi:si:et:sw=2:sts=2:ts=2
|
||||
|
||||
import re
|
||||
import os
|
||||
from os.path import abspath, join, dirname
|
||||
import shutil
|
||||
import time
|
||||
|
||||
import chardet
|
||||
|
||||
img_extension = 'jpg'
|
||||
|
||||
def srt2txt(srt, encoding = "utf-8"):
|
||||
subtitles = srt2dict(srt, encoding)
|
||||
txt = ''
|
||||
for k in sorted([int(k) for k in subtitles]):
|
||||
txt += "%s\n\n" % subtitles["%s" % k]['text']
|
||||
return txt.strip()
|
||||
|
||||
def srt2dict(srt, encoding = "utf-8"):
|
||||
'''convert srt string into a dict in the form
|
||||
dict(num = dict(start, stop, text))
|
||||
'''
|
||||
subdict = {}
|
||||
srt = srt.replace('\r', '').strip()
|
||||
subtitles = srt.strip().split('\n\n')
|
||||
for subtitle in subtitles:
|
||||
if subtitle.strip():
|
||||
subtitle = subtitle.strip().split('\n')
|
||||
if len(subtitle) > 2:
|
||||
start_stop = subtitle[1].split('-->')
|
||||
start_stop[0] = start_stop[0].strip().split(' ')[0]
|
||||
start_stop[1] = start_stop[1].strip().split(' ')[0]
|
||||
start_stop[0] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[0])
|
||||
start_stop[1] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[1])
|
||||
subtitle[0] ="%s" % int(subtitle[0])
|
||||
subdict[subtitle[0]] = {
|
||||
'start': start_stop[0],
|
||||
'stop': start_stop[1],
|
||||
'text': u'\n'.join(subtitle[2:]),
|
||||
}
|
||||
return subdict
|
||||
|
||||
def dict2srt(subtitles, encoding = "utf-8"):
|
||||
'''convert dict in the form dict(num = dict(start, stop, text))
|
||||
into an srt file
|
||||
'''
|
||||
srt = ''
|
||||
for k in sorted([int(k) for k in subtitles]):
|
||||
k = "%s" % k
|
||||
srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
|
||||
k,
|
||||
subtitles[k]['start'],
|
||||
subtitles[k]['stop'],
|
||||
subtitles[k]['text'])
|
||||
srt = srt.strip()
|
||||
return srt.encode(encoding)
|
||||
|
||||
def time_str2msec(time_string):
|
||||
from datetime import datetime, timedelta
|
||||
import time
|
||||
time_string = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', time_string)
|
||||
if len(time_string.split(',')) > 1:
|
||||
msec = float("0." + time_string.split(',')[-1])
|
||||
else:
|
||||
msec = 0.0
|
||||
time_string = time_string.split(',')[0]
|
||||
time_string = "2007 " + time_string
|
||||
offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
|
||||
base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
|
||||
return int((offset - base) * 1000)
|
||||
|
||||
def msec2time_str(msec):
|
||||
import time
|
||||
msec_string = "%s" % msec
|
||||
ms = ",%s" % msec_string[-3:]
|
||||
sec = float(msec) / 1000
|
||||
return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
|
||||
|
||||
def shift_time(offset, time_string):
|
||||
''' return time shifted by offset milliseconds
|
||||
format of time is expedted to be 01:50:52,123
|
||||
'''
|
||||
new_time = time_str2msec(time_string) + offset
|
||||
return msec2time_str(new_time)
|
||||
|
||||
def shift_subtitles(offset, offset_num, subtitles):
|
||||
'''
|
||||
shifts a subtitle by offset, where offsest is a tuple (time, position)
|
||||
'''
|
||||
sdict = {}
|
||||
for k in sorted([int(k) for k in subtitles]):
|
||||
ko = "%s" % (k + offset_num)
|
||||
sdict[ko] = subtitles["%s" % k]
|
||||
sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
|
||||
sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
|
||||
return sdict
|
||||
|
||||
def merge_subtitles(subtitles):
|
||||
'''
|
||||
converts a list of subtitles / dict(txt, length)
|
||||
into one srt subtitle
|
||||
'''
|
||||
subs = {}
|
||||
offset = 0
|
||||
for k in sorted(subtitles):
|
||||
sdict = srt2dict(subtitles[k]['txt'])
|
||||
if offset:
|
||||
sdict = shift_subtitles(offset, len(subs), sdict)
|
||||
for key in sdict:
|
||||
subs[key] = sdict[key]
|
||||
offset += subtitles[k]['length']
|
||||
return dict2srt(subs)
|
||||
|
||||
def split_subtitle(subtitles, offset):
|
||||
'''
|
||||
split subtitles at offset
|
||||
'''
|
||||
offset_time = time.strftime("%H:%M:%S", offset)
|
||||
one = {}
|
||||
two = {}
|
||||
for k in sorted([int(k) for k in subtitles]):
|
||||
if subtitles['stop'] < offset_time:
|
||||
one[k] = subtitle[k]
|
||||
else:
|
||||
two[k] = subtitle[k]
|
||||
two = shift_subtitles(-offset, -len(two), two)
|
||||
|
||||
def detectEncoding(fp):
|
||||
bomDict={ # bytepattern : name
|
||||
(0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",
|
||||
(0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
|
||||
(0xFE, 0xFF, None, None) : "utf_16_be",
|
||||
(0xFF, 0xFE, None, None) : "utf_16_le",
|
||||
(0xEF, 0xBB, 0xBF, None) : "utf_8",
|
||||
}
|
||||
|
||||
# go to beginning of file and get the first 4 bytes
|
||||
oldFP = fp.tell()
|
||||
fp.seek(0)
|
||||
(byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
|
||||
|
||||
# try bom detection using 4 bytes, 3 bytes, or 2 bytes
|
||||
bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
|
||||
if not bomDetection :
|
||||
bomDetection = bomDict.get((byte1, byte2, byte3, None))
|
||||
if not bomDetection :
|
||||
bomDetection = bomDict.get((byte1, byte2, None, None))
|
||||
|
||||
## if BOM detected, we're done :-)
|
||||
fp.seek(oldFP)
|
||||
if bomDetection :
|
||||
return bomDetection
|
||||
|
||||
encoding = 'latin-1'
|
||||
#more character detecting magick using http://chardet.feedparser.org/
|
||||
fp.seek(0)
|
||||
rawdata = fp.read()
|
||||
encoding = chardet.detect(rawdata)['encoding']
|
||||
fp.seek(oldFP)
|
||||
return encoding
|
||||
|
||||
def loadSrt(fname):
|
||||
f = open(fname)
|
||||
encoding = detectEncoding(f)
|
||||
data = f.read()
|
||||
f.close()
|
||||
try:
|
||||
udata = unicode(data, encoding)
|
||||
except:
|
||||
try:
|
||||
udata = unicode(data, 'latin-1')
|
||||
except:
|
||||
print "failed to detect encoding, giving up"
|
||||
udate = u''
|
||||
if udata.startswith(u'\ufeff'):
|
||||
udata = udata[1:]
|
||||
return udata
|
||||
|
|
@ -6,8 +6,9 @@ import Image
|
|||
import math
|
||||
from StringIO import StringIO
|
||||
|
||||
from subtitles import time2ms
|
||||
|
||||
import oxdb_cache
|
||||
from subtitles import srt2dict, time_str2msec
|
||||
|
||||
|
||||
lineWidth = 600
|
||||
|
@ -61,8 +62,8 @@ def loadTimelineOverlay(movie, query, lines = -1):
|
|||
mask = Image.new("RGBA", size, background)
|
||||
|
||||
for subtitle in movie.overlay(query):
|
||||
start = int(round(time_str2msec(subtitle.start) / 1000))
|
||||
stop = int(round(time_str2msec(subtitle.stop) / 1000))
|
||||
start = int(round(time2ms(subtitle.start) / 1000))
|
||||
stop = int(round(time2ms(subtitle.stop) / 1000))
|
||||
if start < stop:
|
||||
currentLine = math.ceil(start / lineWidth)
|
||||
if currentLine <= l:
|
||||
|
@ -93,8 +94,8 @@ def loadTimelineImageMap(movie):
|
|||
imageMap ='<map name="timelineImageMap">'
|
||||
for key in sorted([int(k) for k in s]):
|
||||
sub = s["%s" % key]
|
||||
start = int(round(time_str2msec(sub['start']) / 1000))
|
||||
stop = int(round(time_str2msec(sub['stop']) / 1000))
|
||||
start = int(round(time2ms(sub['start']) / 1000))
|
||||
stop = int(round(time2ms(sub['stop']) / 1000))
|
||||
if start < stop:
|
||||
currentLine = math.ceil(start / lineWidth)
|
||||
offset = int(currentLine * rowHeight + 4)
|
||||
|
|
Loading…
Reference in a new issue