cablegates/pandora/archive/extract.py

413 lines
13 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement
import os
from os.path import abspath, join, dirname, exists
import fractions
import subprocess
import sys
import shutil
import tempfile
import time
import re
import math
from glob import glob
import numpy as np
import Image
import ox
from ox.utils import json
img_extension='jpg'
FFMPEG2THEORA = 'ffmpeg2theora'
class AspectRatio(fractions.Fraction):
def __new__(cls, numerator, denominator=None):
if not denominator:
ratio = map(int, numerator.split(':'))
if len(ratio) == 1: ratio.append(1)
numerator = ratio[0]
denominator = ratio[1]
#if its close enough to the common aspect ratios rather use that
if abs(numerator/denominator - 4/3) < 0.03:
numerator = 4
denominator = 3
elif abs(numerator/denominator - 16/9) < 0.02:
numerator = 16
denominator = 9
return super(AspectRatio, cls).__new__(cls, numerator, denominator)
@property
def ratio(self):
return "%d:%d" % (self.numerator, self.denominator)
def stream(video, target, profile, info):
if not os.path.exists(target):
fdir = os.path.dirname(target)
if not os.path.exists(fdir):
os.makedirs(fdir)
dar = AspectRatio(info['video'][0]['display_aspect_ratio'])
'''
WebM look into
lag
mb_static_threshold
qmax/qmin
rc_buf_aggressivity=0.95
token_partitions=4
level / speedlevel
bt?
H264, should bitrates be a bit lower? other stuff possible?
'''
profile, format = profile.split('.')
if profile == '1080p':
height = 1080
audiorate = 48000
audioquality = 6
audiobitrate = None
audiochannels = None
if profile == '720p':
height = 720
audiorate = 48000
audioquality = 5
audiobitrate = None
audiochannels = None
if profile == '480p':
height = 480
audiorate = 44100
audioquality = 2
audiobitrate = None
audiochannels = 2
elif profile == '360p':
height = 360
audiorate = 44100
audioquality = 1
audiobitrate = None
audiochannels = 1
elif profile == '270p':
height = 270
audiorate = 44100
audioquality = 0
audiobitrate = None
audiochannels = 1
elif profile == '240p':
height = 240
audiorate = 44100
audioquality = 0
audiobitrate = None
audiochannels = 1
else:
height = 96
audiorate = 22050
audioquality = -1
audiobitrate = '22k'
audiochannels = 1
bpp = 0.17
fps = AspectRatio(info['video'][0]['framerate'])
width = int(dar * height)
width += width % 2
bitrate = height*width*fps*bpp/1000
aspect = dar.ratio
#use 1:1 pixel aspect ratio if dar is close to that
if abs(width/height - dar) < 0.02:
aspect = '%s:%s' % (width, height)
if info['audio']:
audio_settings = ['-ar', str(audiorate), '-aq', str(audioquality)]
if audiochannels and 'channels' in info['audio'][0] and info['audio'][0]['channels'] > audiochannels:
audio_settings += ['-ac', str(audiochannels)]
if audiobitrate:
audio_settings += ['-ab', audiobitrate]
if format == 'mp4':
audio_settings += ['-acodec', 'libfaac']
else:
audio_settings += ['-acodec', 'libvorbis']
else:
audio_settings = ['-an']
if info['video']:
video_settings = [
'-vb', '%dk'%bitrate, '-g', '%d' % int(fps*2),
'-s', '%dx%d'%(width, height),
'-aspect', aspect,
]
if format == 'mp4':
#quicktime does not support bpyramid
'''
video_settings += [
'-vcodec', 'libx264',
'-flags', '+loop+mv4',
'-cmp', '256',
'-partitions', '+parti4x4+parti8x8+partp4x4+partp8x8+partb8x8',
'-me_method', 'hex',
'-subq', '7',
'-trellis', '1',
'-refs', '5',
'-bf', '3',
'-flags2', '+bpyramid+wpred+mixed_refs+dct8x8',
'-coder', '1',
'-me_range', '16',
'-keyint_min', '25', #FIXME: should this be related to fps?
'-sc_threshold','40',
'-i_qfactor', '0.71',
'-qmin', '10', '-qmax', '51',
'-qdiff', '4'
]
'''
video_settings += [
'-vcodec', 'libx264',
'-flags', '+loop+mv4',
'-cmp', '256',
'-partitions', '+parti4x4+parti8x8+partp4x4+partp8x8+partb8x8',
'-me_method', 'hex',
'-subq', '7',
'-trellis', '1',
'-refs', '5',
'-bf', '0',
'-flags2', '+mixed_refs',
'-coder', '0',
'-me_range', '16',
'-g', '250', #FIXME: should this be related to fps?
'-keyint_min', '25', #FIXME: should this be related to fps?
'-sc_threshold','40',
'-i_qfactor', '0.71',
'-qmin', '10', '-qmax', '51',
'-qdiff', '4'
]
else:
video_settings = ['-vn']
ffmpeg = FFMPEG2THEORA.replace('2theora', '')
cmd = [ffmpeg, '-y', '-threads', '2', '-i', video] \
+ audio_settings \
+ video_settings
if format == 'mp4':
cmd += ["%s.mp4"%target]
else:
cmd += ['-f','webm', target]
print cmd
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT)
p.communicate()
if format == 'mp4':
cmd = ['qt-faststart', "%s.mp4"%target, target]
print cmd
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT)
p.communicate()
os.unlink("%s.mp4"%target)
return True
def run_command(cmd, timeout=10):
#print cmd
p = subprocess.Popen(cmd, stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT)
while timeout > 0:
time.sleep(0.2)
timeout -= 0.2
if p.poll() != None:
return p.returncode
if p.poll() == None:
os.kill(p.pid, 9)
killedpid, stat = os.waitpid(p.pid, os.WNOHANG)
return p.returncode
def frame(videoFile, frame, position, width=128, redo=False):
'''
params:
videoFile input
frame output
position as float in seconds
width of frame
redo boolean to extract file even if it exists
'''
if exists(videoFile):
frameFolder = os.path.dirname(frame)
if redo or not exists(frame):
if not exists(frameFolder):
os.makedirs(frameFolder)
cmd = ['oxframe', '-i', videoFile, '-o', frame, '-p', str(position), '-x', str(width)]
run_command(cmd)
def resize_image(image_source, image_output, width=None, size=None):
if exists(image_source):
source = Image.open(image_source).convert('RGB')
source_width = source.size[0]
source_height = source.size[1]
if size:
if source_width > source_height:
width = size
height = int(width / (float(source_width) / source_height))
height = height - height % 2
else:
height = size
width = int(height * (float(source_width) / source_height))
width = width - width % 2
else:
height = int(width / (float(source_width) / source_height))
height = height - height % 2
if width < source_width:
resize_method = Image.ANTIALIAS
else:
resize_method = Image.BICUBIC
output = source.resize((width, height), resize_method)
output.save(image_output)
def timeline(video, prefix):
cmd = ['oxtimeline', '-i', video, '-o', prefix]
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p.wait()
#stats based on timeline images
def average_color(prefix):
height = 64
width = 1500
frames = 0
pixels = []
color = np.asarray([0, 0, 0], dtype=np.float32)
for image in sorted(glob("%s.%d.*.png" % (prefix, height))):
timeline = Image.open(image)
frames += timeline.size[0]
p = np.asarray(timeline, dtype=np.float32)
p = np.sum(p, axis=0) / height #average color per frame
pixels.append(p)
for i in range(0, len(pixels)):
p = np.sum(pixels[i], axis=0) / frames
color += p
return list(map(float, color))
def get_distance(rgb0, rgb1):
dst = math.sqrt(pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2))
return dst / math.sqrt(3 * pow(255, 2))
def cuts(prefix):
cuts = []
fps = 25
frames = 0
height = 64
width = 1500
pixels = []
for image in sorted(glob("%s.%d.*.png" % (prefix, height))):
timeline = Image.open(image)
frames += timeline.size[0]
pixels.append(timeline.load())
for frame in range(0, frames):
x = frame % width
if frame > 0:
dst = 0
image0 = int((frame - 1) / width)
image1 = int(frame / width)
for y in range(0, height):
rgb0 = pixels[image0][(x - 1) % width, y]
rgb1 = pixels[image1][x, y]
dst += get_distance(rgb0, rgb1) / height
#print frame / fps, dst
if dst > 0.1:
cuts.append(frame / fps)
return cuts
def divide(num, by):
# >>> divide(100, 3)
# [33, 33, 34]
arr = []
div = int(num / by)
mod = num % by
for i in range(int(by)):
arr.append(div + (i > by - 1 - mod))
return arr
def timeline_strip(item, cuts, info, prefix):
_debug = False
duration = info['duration']
video_height = info['video'][0]['height']
video_width = info['video'][0]['width']
video_ratio = video_width / video_height
line_image = []
timeline_height = 64
timeline_width = 1500
fps = 25
frames = int(duration * fps)
if cuts[0] != 0:
cuts.insert(0, 0)
cuts = map(lambda x: int(round(x * fps)), cuts)
for frame in range(frames):
i = int(frame / timeline_width)
x = frame % timeline_width
if x == 0:
timeline_width = min(timeline_width, frames - frame)
timeline_image = Image.new('RGB', (timeline_width, timeline_height))
if frame in cuts:
c = cuts.index(frame)
if c +1 < len(cuts):
duration = cuts[c + 1] - cuts[c]
frames = math.ceil(duration / (video_width * timeline_height / video_height))
widths = divide(duration, frames)
frame = frame
if _debug:
print widths, duration, frames, cuts[c], cuts[c + 1]
for s in range(int(frames)):
frame_ratio = widths[s] / timeline_height
if video_ratio > frame_ratio:
width = int(round(video_height * frame_ratio))
left = int((video_width - width) / 2)
box = (left, 0, left + width, video_height)
else:
height = int(round(video_width / frame_ratio))
top = int((video_height - height) / 2)
box = (0, top, video_width, top + height)
if _debug:
print frame, 'cut', c, 'frame', s, frame, 'width', widths[s], box
#FIXME: why does this have to be frame+1?
frame_image = Image.open(item.frame((frame+1)/fps))
frame_image = frame_image.crop(box).resize((widths[s], timeline_height), Image.ANTIALIAS)
for x_ in range(widths[s]):
line_image.append(frame_image.crop((x_, 0, x_ + 1, timeline_height)))
frame += widths[s]
if len(line_image) > frame:
timeline_image.paste(line_image[frame], (x, 0))
if x == timeline_width - 1:
timeline_file = '%sstrip.64.%04d.png' % (prefix, i)
if _debug:
print 'writing', timeline_file
timeline_image.save(timeline_file)
def chop(response, video, start, end):
if end <= start:
return ''
t = end - start
cmd = [
'ffmpeg',
'-y',
'-i', video,
'-ss', '%.3f'%start,
'-t','%.3f'%t,
'-vcodec', 'copy',
'-acodec', 'copy',
'-f', 'webm',
'/dev/stdout'
]
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=open('/dev/null', 'w'))
stdout, stderr = p.communicate()
return stdout