From ac2e829016d124c3873fb8a6169f807af2e7ba9d Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Wed, 8 Jun 2016 11:36:55 +0200
Subject: [PATCH] fix python3 ox.srt

---
 ox/srt.py | 52 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/ox/srt.py b/ox/srt.py
index 1b3f8b3..1eb912f 100644
--- a/ox/srt.py
+++ b/ox/srt.py
@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 from __future__ import with_statement, division, print_function
-import chardet
-import re
 import codecs
+import re
 
+import chardet
+from six import PY2
 import ox
 
 
@@ -12,18 +13,21 @@ __all__ = []
 
 
 def _detect_encoding(fp):
-    bomDict={ # bytepattern : name
-              (0x00, 0x00, 0xFE, 0xFF): "utf_32_be",
-              (0xFF, 0xFE, 0x00, 0x00): "utf_32_le",
-              (0xFE, 0xFF, None, None): "utf_16_be",
-              (0xFF, 0xFE, None, None): "utf_16_le",
-              (0xEF, 0xBB, 0xBF, None): "utf_8",
-            }
+    bomDict = {  # bytepattern : name
+        (0x00, 0x00, 0xFE, 0xFF): "utf_32_be",
+        (0xFF, 0xFE, 0x00, 0x00): "utf_32_le",
+        (0xFE, 0xFF, None, None): "utf_16_be",
+        (0xFF, 0xFE, None, None): "utf_16_le",
+        (0xEF, 0xBB, 0xBF, None): "utf_8",
+    }
 
     # go to beginning of file and get the first 4 bytes
     oldFP = fp.tell()
     fp.seek(0)
-    (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
+    if PY2:
+        (byte1, byte2, byte3, byte4) = [ord(b) for b in fp.read(4)]
+    else:
+        (byte1, byte2, byte3, byte4) = fp.read(4)
 
     # try bom detection using 4 bytes, 3 bytes, or 2 bytes
     bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
@@ -31,18 +35,18 @@ def _detect_encoding(fp):
         bomDetection = bomDict.get((byte1, byte2, byte3, None))
         if not bomDetection:
             bomDetection = bomDict.get((byte1, byte2, None, None))
-    ## if BOM detected, we're done :-)
+    # if BOM detected, we're done :-)
     fp.seek(oldFP)
     if bomDetection:
         return bomDetection
     encoding = 'latin-1'
-    #more character detecting magick using http://chardet.feedparser.org/
+    # more character detecting magick using http://chardet.feedparser.org/
     fp.seek(0)
     rawdata = fp.read()
-    #if data can be decoded as utf-8 use that, try chardet otherwise
-    #chardet detects utf-8 as ISO-8859-2 most of the time
+    # if data can be decoded as utf-8 use that, try chardet otherwise
+    # chardet detects utf-8 as ISO-8859-2 most of the time
     try:
-        data = unicode(rawdata, 'utf-8')
+        rawdata.decode('utf-8')
         encoding = 'utf-8'
     except:
         encoding = chardet.detect(rawdata)['encoding']
@@ -63,26 +67,28 @@ def load(filename, offset=0):
     def parse_time(t):
         return offset + ox.time2ms(t.replace(',', '.')) / 1000
 
-    with open(filename) as f:
+    with open(filename, 'rb') as f:
         encoding = _detect_encoding(f)
         data = f.read()
     try:
-        data = unicode(data, encoding)
+        data = data.decode(encoding)
     except:
         try:
-            data = unicode(data, 'latin-1')
+            data = data.decode('latin-1')
         except:
             print("failed to detect encoding, giving up")
             return srt
 
     data = data.replace('\r\n', '\n')
-    srts = re.compile('(\d\d:\d\d:\d\d[,.]\d\d\d)\s*?-->\s*?(\d\d:\d\d:\d\d[,.]\d\d\d).*?\n(.*?)\n\n', re.DOTALL)
+    regexp = r'(\d\d:\d\d:\d\d[,.]\d\d\d)\s*?-->\s*?(\d\d:\d\d:\d\d[,.]\d\d\d).*?\n(.*?)\n\n'
+    srts = re.compile(regexp, re.DOTALL)
     i = 0
     for s in srts.findall(data):
-        _s = {'id': str(i),
-              'in': parse_time(s[0]),
-              'out': parse_time(s[1]),
-              'value': s[2].strip()
+        _s = {
+            'id': str(i),
+            'in': parse_time(s[0]),
+            'out': parse_time(s[1]),
+            'value': s[2].strip()
         }
         srt.append(_s)
         i += 1