From ba5e4af355b9a4e5c38be22bff9eef11539ecba2 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 9 Aug 2018 18:54:49 +0000 Subject: [PATCH] chop/encode mp4 and include subtitles --- pandora/archive/chop.py | 202 +++++++++++++++++++++++++++++++++++++ pandora/archive/extract.py | 46 +++++---- pandora/item/views.py | 10 +- 3 files changed, 240 insertions(+), 18 deletions(-) create mode 100644 pandora/archive/chop.py diff --git a/pandora/archive/chop.py b/pandora/archive/chop.py new file mode 100644 index 000000000..3720bb28c --- /dev/null +++ b/pandora/archive/chop.py @@ -0,0 +1,202 @@ +import json +import os +import subprocess +from bisect import bisect_left + +import ox + +class Chop(object): + keyframes = [] + subtitles = None + info = {} + ffmpeg = [ + 'ffmpeg', + '-nostats', '-loglevel', 'error', + '-y', + ] + + def __init__(self, source, output, start, end, subtitles=None): + self.video = source + self.subtitles = subtitles + output_file, output_ext = os.path.splitext(output) + sections = self.get_gop_sections(start, end) + + if output_ext == '.mp4': + part_ext = '.ts' + else: + part_ext = output_ext + start_part = output + '.start' + part_ext + middle_part = output + '.middle' + part_ext + end_part = output + '.end' + part_ext + + segments = [] + cmds = [] + + # prepare subtitles + if self.subtitles: + self.subtitles_output = output_file + '.srt' + cmd = self.ffmpeg + [ + '-i', self.subtitles, + '-ss', '%f' % start, '-t', '%f' % (end-start), + self.subtitles_output + ] + cmds.append(cmd) + + # re-encode up to keyframe + if sections['start'][1] > sections['start'][0]: + cmd = self.encode(source, start_part, + start=start, + duration=sections['start'][1] - start) + cmds.append(cmd) + segments.append(start_part) + # cut out middle segment + if sections['end'][1] - sections['start'][2] > 0: + cmd = self.copy(source, middle_part, + start=sections['start'][2], + duration=sections['end'][1] - sections['start'][2]) + cmds.append(cmd) + segments.append(middle_part) + # re-encode after last keyframe + if sections['end'][2] > sections['end'][1]: + cmd = self.encode(source, end_part, + start=sections['end'][1], + duration=end - sections['end'][1]) + cmds.append(cmd) + segments.append(end_part) + + if len(segments) > 1: + cmd, files = self.join(segments, output) + cmds.append(cmd) + segments += files + else: + cmds.append(['cp', segments[0], output]) + + for cmd in cmds: + #print(' '.join(cmd)) + subprocess.call(cmd) + for segment in segments: + os.unlink(segment) + + def get_info(self): + if self.info: + return self.info + self.info = ox.avinfo(self.video) + return self.info + + def get_keyframes(self): + video = self.video + if self.keyframes: + return self.keyframes + + keyframes_cache = video + '.keyframes' + if os.path.exists(keyframes_cache): + with open(keyframes_cache, 'r') as fd: + self.keyframes = json.load(fd) + return self.keyframes + + cmd = [ + 'ffprobe', + '-v', 'error', + '-show_packets', '-select_streams', 'v', + '-show_entries', 'packet=pts_time,flags', + '-of', 'csv', + '-i', video + ] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + stdout, stderr = p.communicate() + result = stdout.decode().strip() + keyframe_times = [] + timecode = 0 + for line in result.split('\n'): + if line.split(',')[1] != 'N/A': + timecode = line.split(',')[1] + if ',K' in line: + keyframe_times.append(float(timecode)) + + last_keyframe = self.get_info()['duration'] + if keyframe_times[-1] != last_keyframe: + keyframe_times.append(last_keyframe) + + self.keyframes = keyframe_times + if not os.path.exists(keyframes_cache): + with open(keyframes_cache, 'w') as fd: + json.dump(keyframe_times, fd) + return keyframe_times + + def get_gop_sections(self, start: float, end: float) -> dict: + keyframes = self.get_keyframes() + start_pos = bisect_left(keyframes, start) + end_pos = bisect_left(keyframes, end) + return { + 'start': ( + keyframes[start_pos - 1] if start_pos > 0 else keyframes[start_pos], + keyframes[start_pos], + keyframes[start_pos + 1] + ), + 'end': ( + keyframes[end_pos - 2] if end_pos != (len(keyframes) - 1) else keyframes[end_pos - 1], + keyframes[end_pos - 1] if end_pos != (len(keyframes) - 1) else keyframes[end_pos], + keyframes[end_pos] + ) + } + + def encode(self, source, target, start, duration): + info = self.get_info() + if self.info['audio']: + acodec = [ + '-c:a', + self.info['audio'][0]['codec'] + ] + else: + acodec = [] + vcodec = [ + '-c:v', + self.info['video'][0]['codec'] + ] + + cmd = self.ffmpeg + [ + '-ss', '%f' % start, + '-i', source, + '-t', '%f' % duration, + '-reset_timestamps', '1', + ] + acodec + [ + ] + vcodec + [ + target + ] + return cmd + + def copy(self, source, target, start, duration): + cmd = self.ffmpeg + [ + '-ss', '%f' % start, + '-i', source, + '-t', '%f' % duration, + '-c:v', 'copy', '-c:a', 'copy', + '-reset_timestamps', '1', + target + ] + return cmd + + def join(self, segments, target): + file_list = target + '.txt' + with open(file_list, 'w') as fd: + for segment in segments: + fd.write('file %s\n' % segment) + if self.subtitles: + subtitles = [ + '-i', self.subtitles_output, + '-c:s', 'mov_text' + ] + else: + subtitles = [] + cmd = self.ffmpeg + [ + '-f', 'concat', '-safe', '0', '-i', file_list, + ] + subtitles + [ + '-c:v', 'copy', '-c:a', 'copy', + '-reset_timestamps', '1', + target + ] + files = [file_list] + if self.subtitles: + files.append(self.subtitles_output) + return cmd, files + diff --git a/pandora/archive/extract.py b/pandora/archive/extract.py index 448e52e39..413118126 100644 --- a/pandora/archive/extract.py +++ b/pandora/archive/extract.py @@ -602,27 +602,39 @@ def timeline_strip(item, cuts, info, prefix): timeline_image.save(timeline_file) -def chop(video, start, end): +def chop(video, start, end, subtitles=None): t = end - start tmp = tempfile.mkdtemp() ext = os.path.splitext(video)[1] choped_video = '%s/tmp%s' % (tmp, ext) - cmd = [ - settings.FFMPEG, - '-y', - '-i', video, - '-ss', '%.3f' % start, - '-t', '%.3f' % t, - '-c:v', 'copy', - '-c:a', 'copy', - '-f', ext[1:], - choped_video - ] - p = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=open('/dev/null', 'w'), - stderr=open('/dev/null', 'w'), - close_fds=True) - p.wait() + if subtitles and ext == '.mp4': + subtitles_f = choped_video + '.full.srt' + with open(subtitles_f, 'wb') as fd: + fd.write(subtitles) + else: + subtitles_f = None + if ext == '.mp4': + from .chop import Chop + Chop(video, choped_video, start, end, subtitles_f) + if subtitles_f: + os.unlink(subtitles_f) + else: + cmd = [ + settings.FFMPEG, + '-y', + '-i', video, + '-ss', '%.3f' % start, + '-t', '%.3f' % t, + '-c:v', 'copy', + '-c:a', 'copy', + '-f', ext[1:], + choped_video + ] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=open('/dev/null', 'w'), + stderr=open('/dev/null', 'w'), + close_fds=True) + p.wait() f = open(choped_video, 'rb') os.unlink(choped_video) os.rmdir(tmp) diff --git a/pandora/item/views.py b/pandora/item/views.py index 9eeba4f10..966744eb8 100644 --- a/pandora/item/views.py +++ b/pandora/item/views.py @@ -1044,7 +1044,15 @@ def video(request, id, resolution, format, index=None, track=None): content_type = mimetypes.guess_type(path)[0] if len(t) == 2 and t[1] > t[0] and duration >= t[1]: - response = HttpResponse(extract.chop(path, t[0], t[1]), content_type=content_type) + # FIXME: could be multilingual here + subtitles = utils.get_by_key(settings.CONFIG['layers'], 'isSubtitles', True) + if subtitles: + srt = item.srt(subtitles['id'], encoder=ox.srt) + if len(srt) < 4: + srt = None + else: + srt = None + response = HttpResponse(extract.chop(path, t[0], t[1], subtitles=srt), content_type=content_type) filename = u"Clip of %s - %s-%s - %s %s%s" % ( item.get('title'), ox.format_duration(t[0] * 1000).replace(':', '.')[:-4],