From 5355dbf82104f8c77a94a860311fa80da15eb3bb Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Fri, 11 Mar 2016 12:12:54 +0000 Subject: [PATCH 2/2] Add WebVTT output support This subset of the format is almost identical to SRT, but I think it's cleaner to have a separate module (at the cost of a little bit of copy-pasta). --- ox/__init__.py | 1 + ox/vtt.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 ox/vtt.py diff --git a/ox/__init__.py b/ox/__init__.py index 4ddaab8..98402fb 100644 --- a/ox/__init__.py +++ b/ox/__init__.py @@ -13,6 +13,7 @@ from . import jsonc from . import net from . import srt from . import utils +from . import vtt from .api import * from .file import * diff --git a/ox/vtt.py b/ox/vtt.py new file mode 100644 index 0000000..6dd7070 --- /dev/null +++ b/ox/vtt.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import codecs + +import ox + + +def _webvtt_timecode(t): + return ox.format_duration(t * 1000, years=False) + + +def encode(data, webvtt=False): + """Encodes subtitles into WebVTT format + + data: list of dicts with 'in', 'out': float and 'value': unicode + + Returns: a UTF-8-encoded bytestring + + >>> encode([{'in': 1.25, 'out': 60 * 60 + 1, 'value': u'touch\\u00E9'}]) + '\\xef\\xbb\\xbfWEBVTT\\r\\n\\r\\n1\\r\\n00:00:01.250 --> 01:00:01.000\\r\\ntouch\\xc3\\xa9\\r\\n\\r\\n' + """ + srt = u'WEBVTT\r\n\r\n' + + for i, s in enumerate(data, 1): + srt += '%d\r\n%s --> %s\r\n%s\r\n\r\n' % ( + i, + _webvtt_timecode(s['in']), + _webvtt_timecode(s['out']), + s['value'].replace('\n', '\r\n').strip() + ) + + return codecs.BOM_UTF8 + srt.encode('utf-8') -- 2.5.0