migrate to pysubtitles, remove local outdated version
This commit is contained in:
parent
2e0b0605a2
commit
3f9407c944
4 changed files with 29 additions and 210 deletions
|
@ -8,7 +8,7 @@ from os.path import abspath, join, dirname
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from subtitles import *
|
from subtitles import time2ms
|
||||||
|
|
||||||
|
|
||||||
def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
|
def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, height=96, offset = 0):
|
||||||
|
@ -23,8 +23,8 @@ def extract_flash_ng(movie_file, flash_file, inpoint, outpoint, width=128, heigh
|
||||||
print " becomes ", inpoint
|
print " becomes ", inpoint
|
||||||
|
|
||||||
print "extracting %s -> %s" % (inpoint, outpoint)
|
print "extracting %s -> %s" % (inpoint, outpoint)
|
||||||
duration = time_str2msec(outpoint) - time_str2msec(inpoint)
|
duration = time2ms(outpoint) - time2ms(inpoint)
|
||||||
inpoint = time_str2msec(inpoint)
|
inpoint = time2ms(inpoint)
|
||||||
extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
|
extractClipScript = abspath(join(dirname(__file__), "tools/extract_clip.py"))
|
||||||
|
|
||||||
cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
|
cmd = '''%s "%s" %s %s %s''' % (extractClipScript, movie_file, flash_file, inpoint, duration)
|
||||||
|
@ -46,7 +46,7 @@ def extract_flash(movie_file, flash_file, inpoint, outpoint, width=128, height=9
|
||||||
outpoint = shift_time(-offset, outpoint)
|
outpoint = shift_time(-offset, outpoint)
|
||||||
print " becomes ", inpoint
|
print " becomes ", inpoint
|
||||||
print "extracting %s -> %s" % (inpoint, outpoint)
|
print "extracting %s -> %s" % (inpoint, outpoint)
|
||||||
outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
|
outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
|
||||||
|
|
||||||
audiorate = "44100"
|
audiorate = "44100"
|
||||||
if os.path.exists(movie_file):
|
if os.path.exists(movie_file):
|
||||||
|
@ -87,7 +87,7 @@ def extract_ogg(movie_file, clip_file, inpoint, outpoint, width=128, height=96,
|
||||||
outpoint = shift_time(-offset, outpoint)
|
outpoint = shift_time(-offset, outpoint)
|
||||||
print " becomes ", inpoint
|
print " becomes ", inpoint
|
||||||
print "extracting %s -> %s" % (inpoint, outpoint)
|
print "extracting %s -> %s" % (inpoint, outpoint)
|
||||||
outpoint = float(time_str2msec(outpoint) - time_str2msec(inpoint)) / 1000 + 1
|
outpoint = float(time2ms(outpoint) - time2ms(inpoint)) / 1000 + 1
|
||||||
|
|
||||||
audiorate = "44100"
|
audiorate = "44100"
|
||||||
if os.path.exists(movie_file):
|
if os.path.exists(movie_file):
|
||||||
|
@ -153,7 +153,7 @@ def extract_poster_still(movie_file, png_file, inpoint):
|
||||||
if ext in ('sub', 'srt'):
|
if ext in ('sub', 'srt'):
|
||||||
print "this is not a movie file, will not try to extract frames"
|
print "this is not a movie file, will not try to extract frames"
|
||||||
return
|
return
|
||||||
inpoint = time_str2msec(inpoint)
|
inpoint = time2ms(inpoint)
|
||||||
extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py"))
|
extractClipScript = abspath(join(dirname(__file__), "tools/extract_frame.py"))
|
||||||
|
|
||||||
cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint)
|
cmd = '''%s "%s" "%s" %s 0 128''' % (extractClipScript, movie_file, png_file, inpoint)
|
||||||
|
|
|
@ -17,14 +17,13 @@ from glob import glob
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
|
||||||
import simplejson
|
import simplejson
|
||||||
from scrapeit.utils import read_url
|
from scrapeit.utils import read_url
|
||||||
|
import subtitles
|
||||||
|
|
||||||
import cache
|
import cache
|
||||||
import oxdb_import
|
import oxdb_import
|
||||||
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
|
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
|
||||||
from subtitles import *
|
|
||||||
from extract import *
|
from extract import *
|
||||||
import midentify
|
import midentify
|
||||||
|
|
||||||
|
@ -412,20 +411,20 @@ class ArchiveFile(SQLObject):
|
||||||
def _startPoints(self):
|
def _startPoints(self):
|
||||||
inpoints = []
|
inpoints = []
|
||||||
if self.srt:
|
if self.srt:
|
||||||
d = srt2dict(self.srt)
|
d = subtitles.Subtitle(self.srt)
|
||||||
for s in d.values():
|
for s in d:
|
||||||
inpoints.append(s['start'])
|
inpoints.append(s['in'])
|
||||||
elif self.length:
|
elif self.length:
|
||||||
minutes = int((float(self.length) / 1000) / 60)
|
minutes = int((float(self.length) / 1000) / 60)
|
||||||
minute = 1000 * 60
|
minute = 1000 * 60
|
||||||
inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
|
inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
|
||||||
return inpoints
|
return inpoints
|
||||||
|
|
||||||
def _findSubtitleByStart(self, start):
|
def _findSubtitleByInPoint(self, inpoint):
|
||||||
if self.srt:
|
if self.srt:
|
||||||
d = srt2dict(self.srt)
|
d = subtitles.Subtitle(self.srt)
|
||||||
for s in d.values():
|
for s in d:
|
||||||
if s['start'] == start:
|
if s['in'] == inpoint:
|
||||||
return s
|
return s
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -449,11 +448,11 @@ class ArchiveFile(SQLObject):
|
||||||
height = height - height % 2
|
height = height - height % 2
|
||||||
inpoint = inpoint.replace('.', ':')
|
inpoint = inpoint.replace('.', ':')
|
||||||
if outpoint == -1:
|
if outpoint == -1:
|
||||||
s = self._findSubtitleByStart(inpoint)
|
s = self._findSubtitleByInPoint(inpoint)
|
||||||
if s:
|
if s:
|
||||||
outpoint = s['stop']
|
outpoint = s['out']
|
||||||
else:
|
else:
|
||||||
outpoint = shift_time(5000, inpoint)
|
outpoint = subtitles.shiftTime(5000, inpoint)
|
||||||
else:
|
else:
|
||||||
outpoint = outpoint.replace('.', ':')
|
outpoint = outpoint.replace('.', ':')
|
||||||
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
|
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
|
||||||
|
@ -470,11 +469,11 @@ class ArchiveFile(SQLObject):
|
||||||
height = height - height % 2
|
height = height - height % 2
|
||||||
inpoint = inpoint.replace('.', ':')
|
inpoint = inpoint.replace('.', ':')
|
||||||
if outpoint == -1:
|
if outpoint == -1:
|
||||||
s = self._findSubtitleByStart(inpoint)
|
s = self._findSubtitleByInPoint(inpoint)
|
||||||
if s:
|
if s:
|
||||||
outpoint = s['stop']
|
outpoint = s['out']
|
||||||
else:
|
else:
|
||||||
outpoint = shift_time(5000, inpoint)
|
outpoint = subtitles.shiftTime(5000, inpoint)
|
||||||
else:
|
else:
|
||||||
outpoint = outpoint.replace('.', ':')
|
outpoint = outpoint.replace('.', ':')
|
||||||
extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
|
extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
|
||||||
|
@ -585,7 +584,7 @@ class ArchiveFile(SQLObject):
|
||||||
if not subtitle.absolutePath.endswith('.srt'):
|
if not subtitle.absolutePath.endswith('.srt'):
|
||||||
debug("this is not a subtitle %s" % subtitle.absolutePath)
|
debug("this is not a subtitle %s" % subtitle.absolutePath)
|
||||||
return
|
return
|
||||||
movieFile.srt = loadSrt(subtitle.absolutePath)
|
movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
|
||||||
|
|
||||||
def _set_fps(self, fps):
|
def _set_fps(self, fps):
|
||||||
fps = int(fps * 10000)
|
fps = int(fps * 10000)
|
||||||
|
|
|
@ -1,181 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# -*- Mode: Python; -*-
|
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
|
||||||
|
|
||||||
import re
|
|
||||||
import os
|
|
||||||
from os.path import abspath, join, dirname
|
|
||||||
import shutil
|
|
||||||
import time
|
|
||||||
|
|
||||||
import chardet
|
|
||||||
|
|
||||||
img_extension = 'jpg'
|
|
||||||
|
|
||||||
def srt2txt(srt, encoding = "utf-8"):
|
|
||||||
subtitles = srt2dict(srt, encoding)
|
|
||||||
txt = ''
|
|
||||||
for k in sorted([int(k) for k in subtitles]):
|
|
||||||
txt += "%s\n\n" % subtitles["%s" % k]['text']
|
|
||||||
return txt.strip()
|
|
||||||
|
|
||||||
def srt2dict(srt, encoding = "utf-8"):
|
|
||||||
'''convert srt string into a dict in the form
|
|
||||||
dict(num = dict(start, stop, text))
|
|
||||||
'''
|
|
||||||
subdict = {}
|
|
||||||
srt = srt.replace('\r', '').strip()
|
|
||||||
subtitles = srt.strip().split('\n\n')
|
|
||||||
for subtitle in subtitles:
|
|
||||||
if subtitle.strip():
|
|
||||||
subtitle = subtitle.strip().split('\n')
|
|
||||||
if len(subtitle) > 2:
|
|
||||||
start_stop = subtitle[1].split('-->')
|
|
||||||
start_stop[0] = start_stop[0].strip().split(' ')[0]
|
|
||||||
start_stop[1] = start_stop[1].strip().split(' ')[0]
|
|
||||||
start_stop[0] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[0])
|
|
||||||
start_stop[1] = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', start_stop[1])
|
|
||||||
subtitle[0] ="%s" % int(subtitle[0])
|
|
||||||
subdict[subtitle[0]] = {
|
|
||||||
'start': start_stop[0],
|
|
||||||
'stop': start_stop[1],
|
|
||||||
'text': u'\n'.join(subtitle[2:]),
|
|
||||||
}
|
|
||||||
return subdict
|
|
||||||
|
|
||||||
def dict2srt(subtitles, encoding = "utf-8"):
|
|
||||||
'''convert dict in the form dict(num = dict(start, stop, text))
|
|
||||||
into an srt file
|
|
||||||
'''
|
|
||||||
srt = ''
|
|
||||||
for k in sorted([int(k) for k in subtitles]):
|
|
||||||
k = "%s" % k
|
|
||||||
srt += "%s\r\n%s --> %s\r\n%s\r\n\r\n" % (
|
|
||||||
k,
|
|
||||||
subtitles[k]['start'],
|
|
||||||
subtitles[k]['stop'],
|
|
||||||
subtitles[k]['text'])
|
|
||||||
srt = srt.strip()
|
|
||||||
return srt.encode(encoding)
|
|
||||||
|
|
||||||
def time_str2msec(time_string):
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
import time
|
|
||||||
time_string = re.sub('(\d{2}).(\d{2}).(\d{2}).(\d{3})', '\\1:\\2:\\3,\\4', time_string)
|
|
||||||
if len(time_string.split(',')) > 1:
|
|
||||||
msec = float("0." + time_string.split(',')[-1])
|
|
||||||
else:
|
|
||||||
msec = 0.0
|
|
||||||
time_string = time_string.split(',')[0]
|
|
||||||
time_string = "2007 " + time_string
|
|
||||||
offset = time.mktime(time.strptime(time_string, "%Y %H:%M:%S")) + msec
|
|
||||||
base = time.mktime(time.strptime("2007 00:00:00", "%Y %H:%M:%S"))
|
|
||||||
return int((offset - base) * 1000)
|
|
||||||
|
|
||||||
def msec2time_str(msec):
|
|
||||||
import time
|
|
||||||
msec_string = "%s" % msec
|
|
||||||
ms = ",%s" % msec_string[-3:]
|
|
||||||
sec = float(msec) / 1000
|
|
||||||
return time.strftime("%H:%M:%S", time.gmtime(sec)) + ms
|
|
||||||
|
|
||||||
def shift_time(offset, time_string):
|
|
||||||
''' return time shifted by offset milliseconds
|
|
||||||
format of time is expedted to be 01:50:52,123
|
|
||||||
'''
|
|
||||||
new_time = time_str2msec(time_string) + offset
|
|
||||||
return msec2time_str(new_time)
|
|
||||||
|
|
||||||
def shift_subtitles(offset, offset_num, subtitles):
|
|
||||||
'''
|
|
||||||
shifts a subtitle by offset, where offsest is a tuple (time, position)
|
|
||||||
'''
|
|
||||||
sdict = {}
|
|
||||||
for k in sorted([int(k) for k in subtitles]):
|
|
||||||
ko = "%s" % (k + offset_num)
|
|
||||||
sdict[ko] = subtitles["%s" % k]
|
|
||||||
sdict[ko]['start'] = shift_time(offset, sdict[ko]['start'])
|
|
||||||
sdict[ko]['stop'] = shift_time(offset, sdict[ko]['stop'])
|
|
||||||
return sdict
|
|
||||||
|
|
||||||
def merge_subtitles(subtitles):
|
|
||||||
'''
|
|
||||||
converts a list of subtitles / dict(txt, length)
|
|
||||||
into one srt subtitle
|
|
||||||
'''
|
|
||||||
subs = {}
|
|
||||||
offset = 0
|
|
||||||
for k in sorted(subtitles):
|
|
||||||
sdict = srt2dict(subtitles[k]['txt'])
|
|
||||||
if offset:
|
|
||||||
sdict = shift_subtitles(offset, len(subs), sdict)
|
|
||||||
for key in sdict:
|
|
||||||
subs[key] = sdict[key]
|
|
||||||
offset += subtitles[k]['length']
|
|
||||||
return dict2srt(subs)
|
|
||||||
|
|
||||||
def split_subtitle(subtitles, offset):
|
|
||||||
'''
|
|
||||||
split subtitles at offset
|
|
||||||
'''
|
|
||||||
offset_time = time.strftime("%H:%M:%S", offset)
|
|
||||||
one = {}
|
|
||||||
two = {}
|
|
||||||
for k in sorted([int(k) for k in subtitles]):
|
|
||||||
if subtitles['stop'] < offset_time:
|
|
||||||
one[k] = subtitle[k]
|
|
||||||
else:
|
|
||||||
two[k] = subtitle[k]
|
|
||||||
two = shift_subtitles(-offset, -len(two), two)
|
|
||||||
|
|
||||||
def detectEncoding(fp):
|
|
||||||
bomDict={ # bytepattern : name
|
|
||||||
(0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",
|
|
||||||
(0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
|
|
||||||
(0xFE, 0xFF, None, None) : "utf_16_be",
|
|
||||||
(0xFF, 0xFE, None, None) : "utf_16_le",
|
|
||||||
(0xEF, 0xBB, 0xBF, None) : "utf_8",
|
|
||||||
}
|
|
||||||
|
|
||||||
# go to beginning of file and get the first 4 bytes
|
|
||||||
oldFP = fp.tell()
|
|
||||||
fp.seek(0)
|
|
||||||
(byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
|
|
||||||
|
|
||||||
# try bom detection using 4 bytes, 3 bytes, or 2 bytes
|
|
||||||
bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
|
|
||||||
if not bomDetection :
|
|
||||||
bomDetection = bomDict.get((byte1, byte2, byte3, None))
|
|
||||||
if not bomDetection :
|
|
||||||
bomDetection = bomDict.get((byte1, byte2, None, None))
|
|
||||||
|
|
||||||
## if BOM detected, we're done :-)
|
|
||||||
fp.seek(oldFP)
|
|
||||||
if bomDetection :
|
|
||||||
return bomDetection
|
|
||||||
|
|
||||||
encoding = 'latin-1'
|
|
||||||
#more character detecting magick using http://chardet.feedparser.org/
|
|
||||||
fp.seek(0)
|
|
||||||
rawdata = fp.read()
|
|
||||||
encoding = chardet.detect(rawdata)['encoding']
|
|
||||||
fp.seek(oldFP)
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
def loadSrt(fname):
|
|
||||||
f = open(fname)
|
|
||||||
encoding = detectEncoding(f)
|
|
||||||
data = f.read()
|
|
||||||
f.close()
|
|
||||||
try:
|
|
||||||
udata = unicode(data, encoding)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
udata = unicode(data, 'latin-1')
|
|
||||||
except:
|
|
||||||
print "failed to detect encoding, giving up"
|
|
||||||
udate = u''
|
|
||||||
if udata.startswith(u'\ufeff'):
|
|
||||||
udata = udata[1:]
|
|
||||||
return udata
|
|
||||||
|
|
|
@ -6,8 +6,9 @@ import Image
|
||||||
import math
|
import math
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
|
from subtitles import time2ms
|
||||||
|
|
||||||
import oxdb_cache
|
import oxdb_cache
|
||||||
from subtitles import srt2dict, time_str2msec
|
|
||||||
|
|
||||||
|
|
||||||
lineWidth = 600
|
lineWidth = 600
|
||||||
|
@ -61,8 +62,8 @@ def loadTimelineOverlay(movie, query, lines = -1):
|
||||||
mask = Image.new("RGBA", size, background)
|
mask = Image.new("RGBA", size, background)
|
||||||
|
|
||||||
for subtitle in movie.overlay(query):
|
for subtitle in movie.overlay(query):
|
||||||
start = int(round(time_str2msec(subtitle.start) / 1000))
|
start = int(round(time2ms(subtitle.start) / 1000))
|
||||||
stop = int(round(time_str2msec(subtitle.stop) / 1000))
|
stop = int(round(time2ms(subtitle.stop) / 1000))
|
||||||
if start < stop:
|
if start < stop:
|
||||||
currentLine = math.ceil(start / lineWidth)
|
currentLine = math.ceil(start / lineWidth)
|
||||||
if currentLine <= l:
|
if currentLine <= l:
|
||||||
|
@ -93,8 +94,8 @@ def loadTimelineImageMap(movie):
|
||||||
imageMap ='<map name="timelineImageMap">'
|
imageMap ='<map name="timelineImageMap">'
|
||||||
for key in sorted([int(k) for k in s]):
|
for key in sorted([int(k) for k in s]):
|
||||||
sub = s["%s" % key]
|
sub = s["%s" % key]
|
||||||
start = int(round(time_str2msec(sub['start']) / 1000))
|
start = int(round(time2ms(sub['start']) / 1000))
|
||||||
stop = int(round(time_str2msec(sub['stop']) / 1000))
|
stop = int(round(time2ms(sub['stop']) / 1000))
|
||||||
if start < stop:
|
if start < stop:
|
||||||
currentLine = math.ceil(start / lineWidth)
|
currentLine = math.ceil(start / lineWidth)
|
||||||
offset = int(currentLine * rowHeight + 4)
|
offset = int(currentLine * rowHeight + 4)
|
||||||
|
|
Loading…
Reference in a new issue