cablegates/pandora/archive/extract.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement

import os
from os.path import abspath, join, dirname, exists

import fractions
import subprocess
import sys
import shutil
import tempfile
import time
import re
import math
from glob import glob

import numpy as np
import Image
import ox
from ox.utils import json


img_extension='jpg'

FFMPEG2THEORA = 'ffmpeg2theora'

class AspectRatio(fractions.Fraction):
    def __new__(cls, numerator, denominator=None):
        if not denominator:
            ratio = map(int, numerator.split(':'))
            if len(ratio) == 1: ratio.append(1)
            numerator = ratio[0]
            denominator = ratio[1]
            #if its close enough to the common aspect ratios rather use that
            if abs(numerator/denominator - 4/3) < 0.03:
                numerator = 4
                denominator = 3
            elif abs(numerator/denominator - 16/9) < 0.02:
                numerator = 16
                denominator = 9
        return super(AspectRatio, cls).__new__(cls, numerator, denominator)

    @property
    def ratio(self):
        return "%d:%d" % (self.numerator, self.denominator)

def stream(video, target, profile, info):
    if not os.path.exists(target):
        fdir = os.path.dirname(target)
        if not os.path.exists(fdir):
            os.makedirs(fdir)

    dar = AspectRatio(info['video'][0]['display_aspect_ratio'])
    '''
        WebM look into
            lag
            mb_static_threshold
            qmax/qmin
            rc_buf_aggressivity=0.95
            token_partitions=4
            level / speedlevel
            bt?
        H264, should bitrates be a bit lower? other stuff possible?
    '''
    profile, format = profile.split('.')

    if profile == '1080p':
        height = 1080

        audiorate = 48000
        audioquality = 6
        audiobitrate = None
        audiochannels = None
    if profile == '720p':
        height = 720

        audiorate = 48000
        audioquality = 5
        audiobitrate = None
        audiochannels = None
    if profile == '480p':
        height = 480

        audiorate = 44100
        audioquality = 2
        audiobitrate = None
        audiochannels = 2
    elif profile == '360p':
        height = 360

        audiorate = 44100
        audioquality = 1
        audiobitrate = None
        audiochannels = 1
    elif profile == '270p':
        height = 270

        audiorate = 44100
        audioquality = 0
        audiobitrate = None
        audiochannels = 1
    else:
        height = 96

        audiorate = 22050
        audioquality = -1
        audiobitrate = '22k'
        audiochannels = 1

    bpp = 0.17
    fps = AspectRatio(info['video'][0]['framerate'])

    width  = int(dar * height)
    width += width % 2

    bitrate = height*width*fps*bpp/1000
    aspect = dar.ratio
    #use 1:1 pixel aspect ratio if dar is close to that
    if abs(width/height - dar) < 0.02:
        aspect = '%s:%s' % (width, height)

    if info['audio']:
        audio_settings = ['-ar', str(audiorate), '-aq', str(audioquality)]
        if audiochannels and 'channels' in info['audio'][0] and info['audio'][0]['channels'] > audiochannels:
            audio_settings += ['-ac', str(audiochannels)]
        if audiobitrate:
            audio_settings += ['-ab', audiobitrate]
        if format == 'mp4':
            audio_settings += ['-acodec', 'libfaac']
        else:
            audio_settings += ['-acodec', 'libvorbis']
    else:
        audio_settings = ['-an']

    if info['video']:
        video_settings = [
            '-vb', '%dk'%bitrate, '-g', '%d' % int(fps*2),
            '-s', '%dx%d'%(width, height),
            '-aspect', aspect,
        ]
        if format == 'mp4':
            video_settings += [
                '-vcodec', 'libx264',
                '-flags', '+loop+mv4',
                '-cmp', '256',
                '-partitions', '+parti4x4+parti8x8+partp4x4+partp8x8+partb8x8',
                '-me_method', 'hex',
                '-subq', '7',
                '-trellis', '1',
                '-refs', '5',
                '-bf', '3',
                '-flags2', '+bpyramid+wpred+mixed_refs+dct8x8',
                '-coder', '1',
                '-me_range', '16',
                '-keyint_min', '25', #FIXME: should this be related to fps?
                '-sc_threshold','40',
                '-i_qfactor', '0.71',
                '-qmin', '10', '-qmax', '51',
                '-qdiff', '4'
            ]
    else:
        video_settings = ['-vn']

    ffmpeg = FFMPEG2THEORA.replace('2theora', '')
    cmd = [ffmpeg, '-y', '-threads', '2', '-i', video] \
          + audio_settings \
          + video_settings

    if format == 'mp4':
        cmd += ["%s.mp4"%target]
    else:
        cmd += ['-f','webm', target]

    print cmd
    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT)
    p.communicate()
    if format == 'mp4':
        cmd = ['qt-faststart',  "%s.mp4"%target, target]
        print cmd
        p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT)
        p.communicate()
        os.unlink("%s.mp4"%target)
    return True

def run_command(cmd, timeout=10):
    p = subprocess.Popen(cmd, stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT)
    while timeout > 0:
        time.sleep(0.2)
        timeout -= 0.2
        if p.poll() != None:
            return p.returncode
    if p.poll() == None:
        os.kill(p.pid, 9)
        killedpid, stat = os.waitpid(p.pid, os.WNOHANG)
    return p.returncode

def frame(videoFile, frame, position, width=128, redo=False):
    '''
        params:
            videoFile input
            frame     output
            position as float in seconds
            width of frame
            redo boolean to extract file even if it exists
    '''
    if exists(videoFile):
        frameFolder = os.path.dirname(frame)
        if redo or not exists(frame):
            if not exists(frameFolder):
                os.makedirs(frameFolder)
            cmd = ['oxframe', '-i', videoFile, '-o', frame, '-p', str(position), '-x', str(width)]
            run_command(cmd)

def resize_image(image_source, image_output, width=None, size=None):
    if exists(image_source):
        source = Image.open(image_source).convert('RGB')
        source_width = source.size[0]
        source_height = source.size[1]
        if size:
            if source_width > source_height:
                width = size
                height = int(width / (float(source_width) / source_height))
                height = height - height % 2
            else:
                height = size
                width = int(height * (float(source_width) / source_height))
                width = width - width % 2

        else:
            height = int(width / (float(source_width) / source_height))
            height = height - height % 2

        if width < source_width:
            resize_method = Image.ANTIALIAS
        else:
            resize_method = Image.BICUBIC
        output = source.resize((width, height), resize_method)
        output.save(image_output)

def timeline(video, prefix):
    cmd = ['oxtimeline', '-i', video, '-o', prefix]
    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    p.wait()

#stats based on timeline images
def average_color(prefix):
    height = 64
    width = 1500
    frames = 0
    pixels = []
    color = np.asarray([0, 0, 0], dtype=np.float32)

    for image in sorted(glob("%s.%d.*.png" % (prefix, height))):
        timeline = Image.open(image)
        frames += timeline.size[0]
        p = np.asarray(timeline, dtype=np.float32)
        p = np.sum(p, axis=0) / height               #average color per frame
        pixels.append(p)

    for i in range(0, len(pixels)):
        p = np.sum(pixels[i], axis=0) / frames
        color += p
    return list(map(float, color))

def get_distance(rgb0, rgb1):
    dst = math.sqrt(pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2))
    return dst / math.sqrt(3 * pow(255, 2))

def cuts(prefix):
    cuts = []
    fps = 25
    frames = 0
    height = 64
    width = 1500
    pixels = []
    for image in sorted(glob("%s.%d.*.png" % (prefix, height))):
        timeline = Image.open(image)
        frames += timeline.size[0]
        pixels.append(timeline.load())
    for frame in range(0, frames):
        x = frame % width
        if frame > 0:
            dst = 0
            image0 = int((frame - 1) / width)
            image1 = int(frame / width)
            for y in range(0, height):
                rgb0 = pixels[image0][(x - 1) % width, y]
                rgb1 = pixels[image1][x, y]
                dst += get_distance(rgb0, rgb1) / height
            #print frame / fps, dst
            if dst > 0.1:
                cuts.append(frame / fps)
    return cuts

def divide(num, by):
    # >>> divide(100, 3)
    # [33, 33, 34]
    arr = []
    div = int(num / by)
    mod = num % by
    for i in range(int(by)):
        arr.append(div + (i > by - 1 - mod))
    return arr

def timeline_strip(movie, cuts, info, prefix):
    _debug = False
    duration =  info['duration']
    video_height = info['video'][0]['height']
    video_width = info['video'][0]['width']
    video_ratio = video_width / video_height

    line_image = []
    timeline_height = 64
    timeline_width = 1500
    fps = 25
    frames = int(duration * fps)
    if cuts[0] != 0:
        cuts.insert(0, 0)

    cuts = map(lambda x: int(round(x * fps)), cuts)

    for frame in range(frames):
        i = int(frame / timeline_width)
        x = frame % timeline_width
        if x == 0:
            timeline_width = min(timeline_width, frames - frame)
            timeline_image = Image.new('RGB', (timeline_width, timeline_height))
        if frame in cuts:
            c = cuts.index(frame)
            if c +1 < len(cuts):
                duration = cuts[c + 1] - cuts[c]
                stills = math.ceil(duration / (video_width * timeline_height / video_height))
                widths = divide(duration, stills)
                still = frame
                if _debug:
                    print widths, duration, stills, cuts[c], cuts[c + 1]
                for s in range(int(stills)):
                    still_ratio = widths[s] / timeline_height
                    if video_ratio > still_ratio:
                        width = int(round(video_height * still_ratio))
                        left = int((video_width - width) / 2)
                        box = (left, 0, left + width, video_height)
                    else:
                        height = int(round(video_width / still_ratio))
                        top = int((video_height - height) / 2)
                        box = (0, top, video_width, top + height)
                    if _debug:
                        print frame, 'cut', c, 'still', s, still, 'width', widths[s], box
                    #FIXME: why does this have to be still+1?
                    frame_image = Image.open(movie.frame((still+1)/fps))
                    frame_image = frame_image.crop(box).resize((widths[s], timeline_height), Image.ANTIALIAS)
                    for x_ in range(widths[s]):
                        line_image.append(frame_image.crop((x_, 0, x_ + 1, timeline_height)))
                    still += widths[s]
        if len(line_image) > frame:
            timeline_image.paste(line_image[frame], (x, 0))
        if x == timeline_width - 1:
            timeline_file = '%sstrip.64.%04d.png' % (prefix, i)
            if _debug:
                print 'writing', timeline_file
            timeline_image.save(timeline_file)