use ffmpeg to decode audio/video instead of GStreamer

This commit is contained in:
j 2014-09-25 22:50:37 +02:00
parent 52b6baebc4
commit 8e3752cc11
7 changed files with 204 additions and 276 deletions

2
.bzrignore Normal file
View file

@ -0,0 +1,2 @@
oxtimelines.egg-info
dist

7
README
View file

@ -18,11 +18,6 @@ will be rendered. They can be used at a later point to render small 'keyframes'
tiles without having to decode the video again. tiles without having to decode the video again.
depends on depends on
gstreamer 0.10.30 or newer
python-imaging python-imaging
gst-python
python-ox python-ox
ffmpeg
on ubuntu 10.04 you need
sudo add-apt-repository ppa:gstreamer-developers/ppa

View file

@ -1,7 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# GPL 2008-2012 # GPL 2008-2014
import os import os
import sys import sys

View file

@ -1,19 +1,14 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# GPL 2008-2010 # GPL 2008-2014
__version__ = 'bzr' __version__ = 'bzr'
import gobject
gobject.threads_init()
from glob import glob from glob import glob
import math import math
import os import os
import time import time
import pygst
pygst.require("0.10")
import gst
import Image import Image
import timeline import timeline

187
oxtimelines/ffmpeg.py Normal file
View file

@ -0,0 +1,187 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2014
from __future__ import division, with_statement
import fractions
import subprocess
import Image
import numpy as np
import ox
FFMPEG = ox.file.cmd('ffmpeg')
FPS = 25
class Video(object):
framerate = FPS
samplerate = 48000
def __init__(self, path, height, audio, video_callback, done_callback):
self.height = height
self.video = self.height > 0
self.audio = audio
self.video_callback = video_callback
self.done_callback = done_callback
self.path = path
self.info = info = ox.avinfo(self.path)
self.duration = info['duration']
self.audio = self.audio and info['audio'] != []
self.video = self.video and info['video'] != []
if self.video:
ratio = info['video'][0]['width'] / info['video'][0]['height']
self.width = int(round(self.height * ratio))
if self.width % 4:
self.width += 4 - self.width % 4
if self.audio:
self.volume = []
self.channels = 2
def decode(self, points=None):
if points:
self.in_time = points[0]
self.out_time = points[1]
else:
self.in_time = 0
self.out_time = self.duration
if self.video:
timestamp = 0
for frame in video(self.path, height=self.height, info=self.info, framerate=self.framerate):
if self._is_between_in_and_out(timestamp):
self.video_callback(frame, timestamp)
timestamp += 1/self.framerate
if self.audio:
timestamp = 0
for frame in audio(self.path, info=self.info, samplerate=self.samplerate, framerate=self.framerate):
if self._is_between_in_and_out(timestamp):
frame = rms(frame, 0) / self.samplerate
self.volume.append(frame)
timestamp += 1/self.framerate
#m = max(max(self.volume, key=lambda v: max(v)))
#self.volume = [(v[0]/m, v[1]/m) for v in self.volume]
self.done_callback(self.volume if self.audio else [])
def get_duration(self):
return self.duration
def get_size(self):
return (self.width, self.height)
def _is_between_in_and_out(self, timestamp):
return timestamp >= self.in_time and timestamp < self.out_time
def video(path, height=96, info=None, framerate=FPS):
depth = 3
if not info:
info = ox.avinfo(path)
dar = AspectRatio(info['video'][0]['display_aspect_ratio'])
width = int(dar * height)
width += width % 2
nbytes= depth * width * height
bufsize = nbytes + 100
cmd = [
FFMPEG,
'-loglevel', 'error',
'-i', path,
'-threads', '4',
'-f', 'image2pipe',
'-pix_fmt', 'rgb24',
'-vcodec', 'rawvideo',
'-vf', 'scale=w=%d:h=%d' % (width, height),
'-r', str(framerate),
'-'
]
#print ' '.join(cmd)
p = subprocess.Popen(cmd,
bufsize=bufsize,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
first = True
while True:
data = p.stdout.read(nbytes)
if len(data) != nbytes:
if first:
raise IOError("ERROR: could not open file %s" % path )
else:
return
else:
first = False
yield Image.fromstring('RGB', (width, height), data)
def audio(path, info=None, samplerate=48000, framerate=FPS):
depth = 2
channels = 2
if not info:
info = ox.avinfo(path)
nbytes = depth * samplerate * channels
bufsize = nbytes + 100
#'-loglevel', 'error'
cmd = [
FFMPEG,
'-i', path,
'-vn',
'-ar', str(samplerate),
'-ac', str(channels),
'-acodec', 'pcm_s16le',
'-f', 'wav',
'-'
]
#print ' '.join(cmd)
p = subprocess.Popen(cmd,
bufsize=bufsize,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
chunk = int(nbytes / framerate)
first = True
while True:
data = p.stdout.read(chunk)
if len(data) != chunk:
if first:
raise IOError("ERROR: frame data has wrong size")
else:
return
else:
first = False
audio = np.fromstring(data, dtype="int16")
audio = audio.reshape((len(audio)/channels,channels)).astype(dtype='float')
yield audio
def rms(x, axis=None):
return np.sqrt(np.mean(x**2, axis=axis))
class AspectRatio(fractions.Fraction):
def __new__(cls, numerator, denominator=None):
if not denominator:
ratio = map(int, numerator.split(':'))
if len(ratio) == 1:
ratio.append(1)
numerator = ratio[0]
denominator = ratio[1]
#if its close enough to the common aspect ratios rather use that
if abs(numerator/denominator - 4/3) < 0.03:
numerator = 4
denominator = 3
elif abs(numerator/denominator - 16/9) < 0.02:
numerator = 16
denominator = 9
return super(AspectRatio, cls).__new__(cls, numerator, denominator)
@property
def ratio(self):
return "%d:%d" % (self.numerator, self.denominator)

View file

@ -1,71 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2008
import gobject
import pygst
pygst.require("0.10")
import gst
import Image
class ImageSink(gst.BaseSink):
#def log(self, msg):
# print msg
__gsignals__ = {
"frame" : (gobject.SIGNAL_RUN_LAST,
gobject.TYPE_NONE,
( gobject.TYPE_PYOBJECT, gobject.TYPE_UINT64 ))
}
__gsttemplates__ = (
gst.PadTemplate ("sink",
gst.PAD_SINK,
gst.PAD_ALWAYS,
gst.Caps("video/x-raw-rgb,"
"bpp = (int) 24, depth = (int) 24,"
"endianness = (int) BIG_ENDIAN,"
"red_mask = (int) 0x00FF0000, "
"green_mask = (int) 0x0000FF00, "
"blue_mask = (int) 0x000000FF, "
"width = (int) [ 1, max ], "
"height = (int) [ 1, max ], "
"framerate = (fraction) [ 0, max ]"))
)
def __init__(self, callback):
gst.BaseSink.__init__(self)
self.callback = callback
self.width = 1
self.height = 1
self.set_sync(False)
def do_set_caps(self, caps):
self.log("caps %s" % caps.to_string())
self.log("padcaps %s" % self.get_pad("sink").get_caps().to_string())
self.width = caps[0]["width"]
self.height = caps[0]["height"]
self.framerate = caps[0]["framerate"]
if not caps[0].get_name() == "video/x-raw-rgb":
return False
return True
def do_render(self, buf):
'''
self.log("buffer %s %d" % (
gst.TIME_ARGS(buf.timestamp), len(buf.data)
))
'''
frame_image = Image.fromstring('RGB', (self.width, self.height), buf.data)
# self.emit('frame', frame, buf.timestamp)
self.callback(frame_image, buf.timestamp)
return gst.FLOW_OK
def do_preroll(self, buf):
return self.do_render(buf)
gobject.type_register(ImageSink)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# GPL 2008-2012 # GPL 2008-2014
from __future__ import division, with_statement from __future__ import division, with_statement
@ -8,196 +8,12 @@ from glob import glob
import Image import Image
import math import math
import os import os
from time import time, strftime from time import time
import gobject
import gst
from imagesink import ImageSink
import ox import ox
from .ffmpeg import Video
FPS = 25 FPS = 25
class Video(gst.Pipeline):
def __init__(self, uri, height, audio, video_callback, done_callback):
gst.Pipeline.__init__(self)
self.duration = -1
self.height = height
self.video = self.height > 0
self.audio = audio
self.video_callback = video_callback
self.done_callback = done_callback
self.framerate = gst.Fraction(FPS, 1)
self.ready = False
self.src = gst.element_factory_make('filesrc')
self.src.props.location = uri
self.sbin = gst.element_factory_make('decodebin2')
#self.sbin.props.caps = gst.Caps('video/x-raw-yuv;video/x-raw-rgb')
self.sbin.props.expose_all_streams = True
self.add(self.src, self.sbin)
info = ox.avinfo(uri)
self.audio = self.audio and info['audio'] != []
self.video = self.video and info['video'] != []
if self.video:
ratio = info['video'][0]['width'] / info['video'][0]['height']
self.width = int(round(self.height * ratio))
if self.width % 4:
self.width += 4 - self.width % 4
self.vqueue = gst.element_factory_make('queue')
self.scale = gst.element_factory_make('videoscale')
self.rate = gst.element_factory_make('videorate')
self.csp = gst.element_factory_make('ffmpegcolorspace')
self.vsink = ImageSink(self._video_callback)
#self.vsink.connect('frame', self._video_callback)
self.add(
self.vqueue, self.scale, self.rate, self.csp, self.vsink
)
if self.audio:
self.volume = []
self.channels = 2
self.aqueue = gst.element_factory_make('queue')
self.convert = gst.element_factory_make('audioconvert')
self.resample = gst.element_factory_make('audioresample')
self.level = gst.element_factory_make('level')
# * 0.1 makes the interval about 23 msec, as opposed to about 46 msec otherwise
self.level.props.interval = int(gst.SECOND / float(self.framerate) * 0.1)
self.asink = gst.element_factory_make('fakesink')
self.add(
self.aqueue, self.convert, self.resample, self.level, self.asink
)
self.nanoseconds_per_frame = FPS / 1e9
self.src.link(self.sbin)
self.sbin.connect('pad-added', self._pad_added_callback)
self.set_state(gst.STATE_PAUSED)
self.get_state()
self.get_duration()
#self.frames = int(float(self.duration) * float(self.framerate) / gst.SECOND)
bus = self.get_bus()
bus.add_signal_watch()
self.watch_id = bus.connect('message', self._bus_message_callback)
self.mainloop = gobject.MainLoop()
def _pad_added_callback(self, sbin, pad):
caps = pad.get_caps()
if self.height and 'video' in str(caps):
pad.link(self.vqueue.get_pad('sink'))
self.vqueue.link(self.scale)
self.scale.link(self.rate)
self.rate.link(self.csp, gst.Caps(
'video/x-raw-rgb;video/x-raw-yuv,framerate=%s/%s,width=%s,height=%s' % (
self.framerate.num, self.framerate.denom, self.width, self.height
)
))
self.csp.link(self.vsink)
if self.audio and 'audio' in str(caps):
self.samplerate = caps[0]['rate']
if not isinstance(self.samplerate, int):
self.samplerate = self.samplerate.high
pad.link(self.aqueue.get_pad('sink'))
self.aqueue.link(self.convert)
self.convert.link(self.resample, gst.Caps(
'audio/x-raw-int,channels=%s,width=16,depth=16' % self.channels
))
self.resample.link(self.level, gst.Caps(
'audio/x-raw-int,rate=%s,channels=%s,width=16,depth=16' % (
self.samplerate, self.channels
)
))
self.level.link(self.asink)
def _video_callback(self, frame_image, timestamp):
if not self.ready:
self.ready = True
else:
if self._is_between_in_and_out(timestamp, 'video'):
self.video_callback(frame_image, timestamp)
def _bus_message_callback(self, bus, message):
if self.audio and message.src == self.level:
struct = message.structure
if struct.get_name() == 'level':
timestamp = struct['timestamp']
if self._is_between_in_and_out(timestamp, 'audio'):
sample_i = timestamp * self.nanoseconds_per_frame
if sample_i > len(self.volume):
self.volume.append((
pow(10, struct['rms'][0] / 20),
pow(10, struct['rms'][1] / 20)
))
elif message.src == self and message.type == gst.MESSAGE_EOS:
self._quit()
def _is_between_in_and_out(self, timestamp, av):
try:
if timestamp < self.in_time:
return False
if timestamp >= self.out_time:
self.done[av] = True
if self.done['video'] and self.done['audio']:
self._quit()
# gobject.idle_add(self._done)
return False
return True
except:
# weirdness:
# the first two times audio calls this, the timestamp is
# 23000000. the second time, self.in_time does not exist.
return False
def _quit(self):
if self.is_running:
self.is_running = False
self.mainloop.quit()
def decode(self, points=None):
if points:
self.in_time = points[0] * 1e9
self.out_time = points[1] * 1e9
if self.in_time > 5 * 1e9:
self.seek(
1.0, gst.FORMAT_TIME,
gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
gst.SEEK_TYPE_SET, self.in_time - 5 * 1e9,
gst.SEEK_TYPE_NONE, -1
)
else:
self.in_time = 0
self.out_time = self.duration + 1
self.done = {'video': not self.video, 'audio': not self.audio}
self.set_state(gst.STATE_PLAYING)
self.is_running = True
self.mainloop.run()
self.done_callback(self.volume if self.audio else [])
def get_duration(self):
if self.duration < 0:
if self.video:
pads = self.vsink.sink_pads()
else:
pads = self.asink.sink_pads()
q = gst.query_new_duration(gst.FORMAT_TIME)
for pad in pads:
if pad.get_peer() and pad.get_peer().query(q):
format, self.duration = q.parse_duration()
return self.duration
def get_size(self):
return (self.width, self.height)
class Timelines(): class Timelines():
def __init__( def __init__(
@ -233,6 +49,8 @@ class Timelines():
self.cut_frames = [] self.cut_frames = []
self.max_cut_len = 15000 # 10 minutes self.max_cut_len = 15000 # 10 minutes
self.max_distance = 64 * math.sqrt(3 * pow(255, 2)) self.max_distance = 64 * math.sqrt(3 * pow(255, 2))
else:
self.cuts = []
self.full_tile_w = 1920 self.full_tile_w = 1920
self.large_tile_w = 1500 self.large_tile_w = 1500
@ -253,7 +71,7 @@ class Timelines():
self.log = log self.log = log
if log: if log:
self.profiler = Profiler() self.profiler = Profiler()
self.profiler.set_task('gst') self.profiler.set_task('ffmpeg')
ox.makedirs(self.tile_path) ox.makedirs(self.tile_path)
@ -274,7 +92,7 @@ class Timelines():
video_file, self.large_tile_h if self.render_video else 0, video_file, self.large_tile_h if self.render_video else 0,
self.render_audio, self._video_callback, self._done_callback self.render_audio, self._video_callback, self._done_callback
) )
duration = video.get_duration() / 1e9 duration = video.get_duration()
points = None points = None
if self.points: if self.points:
in_point = None in_point = None
@ -327,6 +145,7 @@ class Timelines():
self.frame_i = 0 self.frame_i = 0
self.frame_offset = 0 self.frame_offset = 0
self.log and self.profiler.set_task('ffmpeg')
self.videos[0].decode(self.file_points[0]) self.videos[0].decode(self.file_points[0])
# remove tiles from previous run # remove tiles from previous run
@ -346,8 +165,8 @@ class Timelines():
self.log and self.profiler.set_task('_video_callback()') self.log and self.profiler.set_task('_video_callback()')
''' '''
if timestamp != None and self.frame_i != int(round(timestamp / 1e9 * FPS)): if timestamp != None and self.frame_i != int(round(timestamp * FPS)):
print 'WARNING: i is', self.frame_i, 'timestamp is', timestamp, '(', int(round(timestamp / 1e9 * FPS)), ')' print 'WARNING: i is', self.frame_i, 'timestamp is', timestamp, '(', int(round(timestamp * FPS)), ')'
''' '''
self.is_last_frame = self.frame_i == self.frame_n - 1 self.is_last_frame = self.frame_i == self.frame_n - 1
large_tile_x = self.frame_i % self.large_tile_w large_tile_x = self.frame_i % self.large_tile_w
@ -441,7 +260,7 @@ class Timelines():
if self.render_antialias: if self.render_antialias:
self._save_full_tile_image('antialias') self._save_full_tile_image('antialias')
self.frame_i += 1 self.frame_i += 1
self.log and self.profiler.set_task('gst') self.log and self.profiler.set_task('ffmpeg')
def _render_keyframes(self): def _render_keyframes(self):
self.log and self.profiler.set_task('_render_keyframes() # keyframes timelines') self.log and self.profiler.set_task('_render_keyframes() # keyframes timelines')
@ -656,6 +475,7 @@ class Timelines():
if self.video_i < self.video_n - 1: if self.video_i < self.video_n - 1:
self.video_i += 1 self.video_i += 1
self.frame_offset = self.frame_i self.frame_offset = self.frame_i
self.log and self.profiler.set_task('ffmpeg')
self.videos[self.video_i].decode(self.file_points[self.video_i]) self.videos[self.video_i].decode(self.file_points[self.video_i])
else: else:
if self.render_video: if self.render_video: