use ffmpeg to decode audio/video instead of GStreamer

2014-09-25 22:50:37 +02:00 · 2014-09-25 22:50:37 +02:00 · 8e3752cc11
commit 8e3752cc11
parent 52b6baebc4
7 changed files with 204 additions and 276 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -0,0 +1,2 @@
+oxtimelines.egg-info
+dist
--- a/7
+++ b/7
@ -18,11 +18,6 @@ will be rendered. They can be used at a later point to render small 'keyframes'
 tiles without having to decode the video again.

 depends on
-    gstreamer 0.10.30 or newer
    python-imaging
-    gst-python
    python-ox
-
-on ubuntu 10.04 you need
-    sudo add-apt-repository ppa:gstreamer-developers/ppa
-
+    ffmpeg
--- a/bin/oxtimelines
+++ b/bin/oxtimelines
@ -1,7 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-# GPL 2008-2012
+# GPL 2008-2014

 import os
 import sys
--- a/oxtimelines/init.py
+++ b/oxtimelines/init.py
@ -1,19 +1,14 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-# GPL 2008-2010
+# GPL 2008-2014
 __version__ = 'bzr'

-import gobject
-gobject.threads_init()

 from glob import glob
 import math
 import os
 import time

-import pygst
-pygst.require("0.10")
-import gst
 import Image

 import timeline
--- a/oxtimelines/ffmpeg.py
+++ b/oxtimelines/ffmpeg.py
@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL 2014
+
+from __future__ import division, with_statement
+
+import fractions
+import subprocess
+
+import Image
+import numpy as np
+import ox
+
+FFMPEG = ox.file.cmd('ffmpeg')
+FPS = 25
+
+class Video(object):
+
+    framerate = FPS
+    samplerate = 48000
+
+    def __init__(self, path, height, audio, video_callback, done_callback):
+
+
+        self.height = height
+        self.video = self.height > 0
+        self.audio = audio
+
+        self.video_callback = video_callback
+        self.done_callback = done_callback
+        self.path = path
+
+        self.info = info = ox.avinfo(self.path)
+        self.duration = info['duration']
+        self.audio = self.audio and info['audio'] != []
+        self.video = self.video and info['video'] != []
+        if self.video:
+            ratio = info['video'][0]['width'] / info['video'][0]['height']
+            self.width = int(round(self.height * ratio))
+            if self.width % 4:
+                self.width += 4 - self.width % 4
+
+        if self.audio:
+            self.volume = []
+            self.channels = 2
+
+    def decode(self, points=None):
+        if points:
+            self.in_time = points[0]
+            self.out_time = points[1]
+        else:
+            self.in_time = 0
+            self.out_time = self.duration
+        if self.video:
+            timestamp = 0
+            for frame in video(self.path, height=self.height, info=self.info, framerate=self.framerate):
+                if self._is_between_in_and_out(timestamp):
+                    self.video_callback(frame, timestamp)
+                timestamp += 1/self.framerate
+        if self.audio:
+            timestamp = 0
+            for frame in audio(self.path, info=self.info, samplerate=self.samplerate, framerate=self.framerate):
+                if self._is_between_in_and_out(timestamp):
+                    frame = rms(frame, 0) / self.samplerate
+                    self.volume.append(frame)
+                timestamp += 1/self.framerate
+            #m = max(max(self.volume, key=lambda v: max(v)))
+            #self.volume = [(v[0]/m, v[1]/m) for v in self.volume]
+        self.done_callback(self.volume if self.audio else [])
+        
+    def get_duration(self):
+        return self.duration
+
+    def get_size(self):
+        return (self.width, self.height)
+
+    def _is_between_in_and_out(self, timestamp):
+        return timestamp >= self.in_time and timestamp < self.out_time
+
+def video(path, height=96, info=None, framerate=FPS):
+    depth = 3
+
+    if not info:
+        info = ox.avinfo(path)
+    dar = AspectRatio(info['video'][0]['display_aspect_ratio'])
+    width = int(dar * height)
+    width += width % 2
+    nbytes= depth * width * height
+    bufsize = nbytes + 100
+
+    cmd = [
+        FFMPEG,
+        '-loglevel', 'error',
+        '-i', path,
+        '-threads', '4',
+        '-f', 'image2pipe',
+        '-pix_fmt', 'rgb24',
+        '-vcodec', 'rawvideo',
+        '-vf', 'scale=w=%d:h=%d' % (width, height),
+        '-r', str(framerate),
+        '-'
+    ]
+    #print ' '.join(cmd)
+    p = subprocess.Popen(cmd,
+        bufsize=bufsize,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+
+    first = True
+    while True:
+        data = p.stdout.read(nbytes)
+        if len(data) != nbytes:
+            if first:
+                raise IOError("ERROR: could not open file %s" % path )
+            else:
+                return
+        else:
+            first = False
+            yield Image.fromstring('RGB', (width, height), data)
+
+def audio(path, info=None, samplerate=48000, framerate=FPS):
+    depth = 2
+    channels = 2
+
+    if not info:
+        info = ox.avinfo(path)
+
+    nbytes = depth * samplerate * channels
+    bufsize = nbytes + 100
+
+    #'-loglevel', 'error'
+    cmd = [
+        FFMPEG,
+        '-i', path,
+        '-vn',
+        '-ar', str(samplerate),
+        '-ac', str(channels),
+        '-acodec', 'pcm_s16le',
+        '-f', 'wav',
+        '-'
+    ]
+    #print ' '.join(cmd)
+    p = subprocess.Popen(cmd,
+        bufsize=bufsize,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+
+    chunk = int(nbytes / framerate)
+    first = True
+    while True:
+        data = p.stdout.read(chunk)
+        if len(data) != chunk:
+            if first:
+                raise IOError("ERROR: frame data has wrong size")
+            else:
+                return
+        else:
+            first = False
+            audio = np.fromstring(data, dtype="int16")
+            audio = audio.reshape((len(audio)/channels,channels)).astype(dtype='float')
+            yield audio
+
+def rms(x, axis=None):
+    return np.sqrt(np.mean(x**2, axis=axis))
+
+class AspectRatio(fractions.Fraction):
+
+    def __new__(cls, numerator, denominator=None):
+        if not denominator:
+            ratio = map(int, numerator.split(':'))
+            if len(ratio) == 1:
+                ratio.append(1)
+            numerator = ratio[0]
+            denominator = ratio[1]
+            #if its close enough to the common aspect ratios rather use that
+            if abs(numerator/denominator - 4/3) < 0.03:
+                numerator = 4
+                denominator = 3
+            elif abs(numerator/denominator - 16/9) < 0.02:
+                numerator = 16
+                denominator = 9
+        return super(AspectRatio, cls).__new__(cls, numerator, denominator)
+
+    @property
+    def ratio(self):
+        return "%d:%d" % (self.numerator, self.denominator)
+
--- a/oxtimelines/imagesink.py
+++ b/oxtimelines/imagesink.py
@ -1,71 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-# GPL 2008
-import gobject
-
-import pygst
-pygst.require("0.10")
-import gst
-
-import Image
-
-
-class ImageSink(gst.BaseSink):
-    #def log(self, msg):
-    #    print msg
-
-    __gsignals__ = {
-        "frame" : (gobject.SIGNAL_RUN_LAST,
-                   gobject.TYPE_NONE,
-                   ( gobject.TYPE_PYOBJECT, gobject.TYPE_UINT64 ))
-        }
-
-    __gsttemplates__ = (
-        gst.PadTemplate ("sink",
-                         gst.PAD_SINK,
-                         gst.PAD_ALWAYS,
-                         gst.Caps("video/x-raw-rgb,"
-                                  "bpp = (int) 24, depth = (int) 24,"
-                                  "endianness = (int) BIG_ENDIAN,"
-                                  "red_mask = (int) 0x00FF0000, "
-                                  "green_mask = (int) 0x0000FF00, "
-                                  "blue_mask = (int) 0x000000FF, "
-                                  "width = (int) [ 1, max ], "
-                                  "height = (int) [ 1, max ], "
-                                  "framerate = (fraction) [ 0, max ]"))
-        )
-
-    def __init__(self, callback):
-        gst.BaseSink.__init__(self)
-        self.callback = callback
-        self.width = 1
-        self.height = 1
-        self.set_sync(False)
-
-    def do_set_caps(self, caps):
-        self.log("caps %s" % caps.to_string())
-        self.log("padcaps %s" % self.get_pad("sink").get_caps().to_string())
-        self.width = caps[0]["width"]
-        self.height = caps[0]["height"]
-        self.framerate = caps[0]["framerate"]
-
-        if not caps[0].get_name() == "video/x-raw-rgb":
-            return False
-        return True
-
-    def do_render(self, buf):
-        '''
-        self.log("buffer %s %d" % (
-            gst.TIME_ARGS(buf.timestamp), len(buf.data)
-        ))
-        '''
-        frame_image = Image.fromstring('RGB', (self.width, self.height), buf.data)
-        # self.emit('frame', frame, buf.timestamp)
-        self.callback(frame_image, buf.timestamp)
-        return gst.FLOW_OK
-
-    def do_preroll(self, buf):
-        return self.do_render(buf)
-
-gobject.type_register(ImageSink)
-
--- a/oxtimelines/timeline.py
+++ b/oxtimelines/timeline.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-# GPL 2008-2012
+# GPL 2008-2014

 from __future__ import division, with_statement

@ -8,196 +8,12 @@ from glob import glob
 import Image
 import math
 import os
-from time import time, strftime
-
-import gobject
-import gst
-
-from imagesink import ImageSink
+from time import time
 import ox
-
+from .ffmpeg import Video

 FPS = 25

-
-class Video(gst.Pipeline):
-
-    def __init__(self, uri, height, audio, video_callback, done_callback):
-
-        gst.Pipeline.__init__(self)
-        self.duration = -1
-
-        self.height = height
-        self.video = self.height > 0
-        self.audio = audio
-
-        self.video_callback = video_callback
-        self.done_callback = done_callback
-        self.framerate = gst.Fraction(FPS, 1)
-        self.ready = False
-
-        self.src = gst.element_factory_make('filesrc')
-        self.src.props.location = uri
-        self.sbin = gst.element_factory_make('decodebin2')
-        #self.sbin.props.caps = gst.Caps('video/x-raw-yuv;video/x-raw-rgb')
-        self.sbin.props.expose_all_streams = True
-        self.add(self.src, self.sbin)
-
-        info = ox.avinfo(uri)
-        self.audio = self.audio and info['audio'] != []
-        self.video = self.video and info['video'] != []
-
-        if self.video:
-            ratio = info['video'][0]['width'] / info['video'][0]['height']
-            self.width = int(round(self.height * ratio))
-            if self.width % 4:
-                self.width += 4 - self.width % 4
-            self.vqueue = gst.element_factory_make('queue')
-            self.scale = gst.element_factory_make('videoscale')
-            self.rate = gst.element_factory_make('videorate')
-            self.csp = gst.element_factory_make('ffmpegcolorspace')
-            self.vsink = ImageSink(self._video_callback)
-            #self.vsink.connect('frame', self._video_callback)
-            self.add(
-                self.vqueue, self.scale, self.rate, self.csp, self.vsink
-            )
-
-        if self.audio:
-            self.volume = []
-            self.channels = 2
-            self.aqueue = gst.element_factory_make('queue')
-            self.convert = gst.element_factory_make('audioconvert')
-            self.resample = gst.element_factory_make('audioresample')
-            self.level = gst.element_factory_make('level')
-            # * 0.1 makes the interval about 23 msec, as opposed to about 46 msec otherwise
-            self.level.props.interval = int(gst.SECOND / float(self.framerate) * 0.1)
-            self.asink = gst.element_factory_make('fakesink')
-            self.add(
-                self.aqueue, self.convert, self.resample, self.level, self.asink
-            )
-            self.nanoseconds_per_frame = FPS / 1e9
-
-        self.src.link(self.sbin)
-        self.sbin.connect('pad-added', self._pad_added_callback)
-
-        self.set_state(gst.STATE_PAUSED)
-        self.get_state()
-        self.get_duration()
-        #self.frames = int(float(self.duration) * float(self.framerate) / gst.SECOND)
-
-        bus = self.get_bus()
-        bus.add_signal_watch()
-        self.watch_id = bus.connect('message', self._bus_message_callback)
-        self.mainloop = gobject.MainLoop()
-
-    def _pad_added_callback(self, sbin, pad):
-        caps = pad.get_caps()
-        if self.height and 'video' in str(caps):
-            pad.link(self.vqueue.get_pad('sink'))
-            self.vqueue.link(self.scale)
-            self.scale.link(self.rate)
-            self.rate.link(self.csp, gst.Caps(
-                'video/x-raw-rgb;video/x-raw-yuv,framerate=%s/%s,width=%s,height=%s' % (
-                    self.framerate.num, self.framerate.denom, self.width, self.height
-                )
-            ))
-            self.csp.link(self.vsink)
-        if self.audio and 'audio' in str(caps):
-            self.samplerate = caps[0]['rate']
-            if not isinstance(self.samplerate, int):
-                self.samplerate = self.samplerate.high
-            pad.link(self.aqueue.get_pad('sink'))
-            self.aqueue.link(self.convert)
-            self.convert.link(self.resample, gst.Caps(
-                'audio/x-raw-int,channels=%s,width=16,depth=16' % self.channels
-            ))
-            self.resample.link(self.level, gst.Caps(
-                'audio/x-raw-int,rate=%s,channels=%s,width=16,depth=16' % (
-                    self.samplerate, self.channels
-                )
-            ))
-            self.level.link(self.asink)
-
-    def _video_callback(self, frame_image, timestamp):
-        if not self.ready:
-            self.ready = True
-        else:
-            if self._is_between_in_and_out(timestamp, 'video'):
-                self.video_callback(frame_image, timestamp)
-
-    def _bus_message_callback(self, bus, message):
-        if self.audio and message.src == self.level:
-            struct = message.structure
-            if struct.get_name() == 'level':
-                timestamp = struct['timestamp']
-                if self._is_between_in_and_out(timestamp, 'audio'):
-                    sample_i = timestamp * self.nanoseconds_per_frame
-                    if sample_i > len(self.volume):
-                        self.volume.append((
-                            pow(10, struct['rms'][0] / 20),
-                            pow(10, struct['rms'][1] / 20)
-                        ))
-        elif message.src == self and message.type == gst.MESSAGE_EOS:
-            self._quit()
-
-    def _is_between_in_and_out(self, timestamp, av):
-        try:
-            if timestamp < self.in_time:
-                return False
-            if timestamp >= self.out_time:
-                self.done[av] = True
-                if self.done['video'] and self.done['audio']:
-                    self._quit()
-                    # gobject.idle_add(self._done)
-                return False
-            return True
-        except:
-            # weirdness:
-            # the first two times audio calls this, the timestamp is
-            # 23000000. the second time, self.in_time does not exist.
-            return False
-
-    def _quit(self):
-        if self.is_running:
-            self.is_running = False
-            self.mainloop.quit()
-
-    def decode(self, points=None):
-        if points:
-            self.in_time = points[0] * 1e9
-            self.out_time = points[1] * 1e9
-            if self.in_time > 5 * 1e9:
-                self.seek(
-                    1.0, gst.FORMAT_TIME,
-                    gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
-                    gst.SEEK_TYPE_SET, self.in_time - 5 * 1e9,
-                    gst.SEEK_TYPE_NONE, -1
-                )
-        else:
-            self.in_time = 0
-            self.out_time = self.duration + 1
-        self.done = {'video': not self.video, 'audio': not self.audio}
-        self.set_state(gst.STATE_PLAYING)
-        self.is_running = True
-        self.mainloop.run()
-        self.done_callback(self.volume if self.audio else [])
-
-    def get_duration(self):
-        if self.duration < 0:
-            if self.video:
-                pads = self.vsink.sink_pads()
-            else:
-                pads = self.asink.sink_pads()
-            q = gst.query_new_duration(gst.FORMAT_TIME)
-            for pad in pads:
-                if pad.get_peer() and pad.get_peer().query(q):
-                    format, self.duration = q.parse_duration()
-        return self.duration
-
-    def get_size(self):
-        return (self.width, self.height)
-
-
 class Timelines():

    def __init__(
@ -233,6 +49,8 @@ class Timelines():
            self.cut_frames = []
            self.max_cut_len = 15000 # 10 minutes
            self.max_distance = 64 * math.sqrt(3 * pow(255, 2))
+        else:
+            self.cuts = []

        self.full_tile_w = 1920
        self.large_tile_w = 1500
@ -253,7 +71,7 @@ class Timelines():
        self.log = log
        if log:
            self.profiler = Profiler()
-            self.profiler.set_task('gst')
+            self.profiler.set_task('ffmpeg')

        ox.makedirs(self.tile_path)

@ -274,7 +92,7 @@ class Timelines():
                video_file, self.large_tile_h if self.render_video else 0,
                self.render_audio, self._video_callback, self._done_callback
            )
-            duration = video.get_duration() / 1e9
+            duration = video.get_duration()
            points = None
            if self.points:
                in_point = None
@ -327,6 +145,7 @@ class Timelines():

        self.frame_i = 0
        self.frame_offset = 0
+        self.log and self.profiler.set_task('ffmpeg')
        self.videos[0].decode(self.file_points[0])

        # remove tiles from previous run
@ -346,8 +165,8 @@ class Timelines():

        self.log and self.profiler.set_task('_video_callback()')
        '''
-        if timestamp != None and self.frame_i != int(round(timestamp / 1e9 * FPS)):
-            print 'WARNING: i is', self.frame_i, 'timestamp is', timestamp, '(', int(round(timestamp / 1e9 * FPS)), ')'
+        if timestamp != None and self.frame_i != int(round(timestamp * FPS)):
+            print 'WARNING: i is', self.frame_i, 'timestamp is', timestamp, '(', int(round(timestamp * FPS)), ')'
        '''
        self.is_last_frame = self.frame_i == self.frame_n - 1
        large_tile_x = self.frame_i % self.large_tile_w
@ -441,7 +260,7 @@ class Timelines():
            if self.render_antialias:
                self._save_full_tile_image('antialias')
        self.frame_i += 1
-        self.log and self.profiler.set_task('gst')
+        self.log and self.profiler.set_task('ffmpeg')

    def _render_keyframes(self):
        self.log and self.profiler.set_task('_render_keyframes() # keyframes timelines')
@ -656,6 +475,7 @@ class Timelines():
        if self.video_i < self.video_n - 1:
            self.video_i += 1
            self.frame_offset = self.frame_i
+            self.log and self.profiler.set_task('ffmpeg')
            self.videos[self.video_i].decode(self.file_points[self.video_i])
        else:
            if self.render_video: