pandora/pandora/archive/extract.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement

import os
from os.path import abspath, join, dirname, exists

import fractions
import subprocess
import sys
import shutil
import tempfile
import time
import re
import math
from glob import glob

import numpy as np
import Image
import ox
from ox.utils import json


img_extension='jpg'

FFMPEG2THEORA = 'ffmpeg2theora'

class AspectRatio(fractions.Fraction):
    def __new__(cls, numerator, denominator=None):
        if not denominator:
            ratio = map(int, numerator.split(':'))
            if len(ratio) == 1: ratio.append(1)
            numerator = ratio[0]
            denominator = ratio[1]
            #if its close enough to the common aspect ratios rather use that
            if abs(numerator/denominator - 4/3) < 0.03:
                numerator = 4
                denominator = 3
            elif abs(numerator/denominator - 16/9) < 0.02:
                numerator = 16
                denominator = 9
        return super(AspectRatio, cls).__new__(cls, numerator, denominator)

    @property
    def ratio(self):
        return "%d:%d" % (self.numerator, self.denominator)

def stream(video, target, profile, info):
    if not os.path.exists(target):
        fdir = os.path.dirname(target)
        if not os.path.exists(fdir):
            os.makedirs(fdir)

    dar = AspectRatio(info['video'][0]['display_aspect_ratio'])
    '''
        WebM look into
            lag
            mb_static_threshold
            qmax/qmin
            rc_buf_aggressivity=0.95
            token_partitions=4
            level / speedlevel
            bt?
        H264, should bitrates be a bit lower? other stuff possible?
    '''
    profile, format = profile.split('.')

    if profile == '1080p':
        height = 1080

        audiorate = 48000
        audioquality = 6
        audiobitrate = None
        audiochannels = None
    if profile == '720p':
        height = 720

        audiorate = 48000
        audioquality = 5
        audiobitrate = None
        audiochannels = None
    if profile == '480p':
        height = 480

        audiorate = 44100
        audioquality = 2
        audiobitrate = None
        audiochannels = 2
    elif profile == '360p':
        height = 360

        audiorate = 44100
        audioquality = 1
        audiobitrate = None
        audiochannels = 1
    elif profile == '270p':
        height = 270

        audiorate = 44100
        audioquality = 0
        audiobitrate = None
        audiochannels = 1
    else:
        height = 96

        audiorate = 22050
        audioquality = -1
        audiobitrate = '22k'
        audiochannels = 1

    bpp = 0.17
    fps = AspectRatio(info['video'][0]['framerate'])

    width  = int(dar * height)
    width += width % 2 

    bitrate = height*width*fps*bpp/1000
    aspect = dar.ratio
    #use 1:1 pixel aspect ratio if dar is close to that
    if abs(width/height - dar) < 0.02:
        aspect = '%s:%s' % (width, height)

    if info['audio']:
        audio_settings = ['-ar', str(audiorate), '-aq', str(audioquality)]
        if audiochannels and 'channels' in info['audio'][0] and info['audio'][0]['channels'] > audiochannels:
            audio_settings += ['-ac', str(audiochannels)]
        if audiobitrate:
            audio_settings += ['-ab', audiobitrate]
        if format == 'mp4':
            audio_settings += ['-acodec', 'libfaac']
        else:
            audio_settings += ['-acodec', 'libvorbis']
    else:
        audio_settings = ['-an']

    if info['video']:
        video_settings = [
            '-vb', '%dk'%bitrate, '-g', '%d' % int(fps*2),
            '-s', '%dx%d'%(width, height),
            '-aspect', aspect,
        ]
        if format == 'mp4':
            video_settings += [
                '-vcodec', 'libx264',
                '-flags', '+loop+mv4',
                '-cmp', '256',
                '-partitions', '+parti4x4+parti8x8+partp4x4+partp8x8+partb8x8',
                '-me_method', 'hex',
                '-subq', '7',
                '-trellis', '1',
                '-refs', '5',
                '-bf', '3',
                '-flags2', '+bpyramid+wpred+mixed_refs+dct8x8',
                '-coder', '1',
                '-me_range', '16',
                '-keyint_min', '25', #FIXME: should this be related to fps?
                '-sc_threshold','40',
                '-i_qfactor', '0.71',
                '-qmin', '10', '-qmax', '51',
                '-qdiff', '4'
            ]
    else:
        video_settings = ['-vn']

    ffmpeg = FFMPEG2THEORA.replace('2theora', '')
    cmd = [ffmpeg, '-y', '-threads', '2', '-i', video] \
          + audio_settings \
          + video_settings

    if format == 'mp4':
        cmd += ["%s.mp4"%target]
    else:
        cmd += ['-f','webm', target]

    print cmd
    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    p.wait()
    if format == 'mp4':
        cmd = ['qt-faststart',  "%s.mp4"%target, target]
        print cmd
        p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        p.wait()
        os.unlink("%s.mp4"%target)

def run_command(cmd, timeout=10):
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    while timeout > 0:
        time.sleep(0.2)
        timeout -= 0.2
        if p.poll() != None:
            return p.returncode
    if p.poll() == None:
        os.kill(p.pid, 9)
        killedpid, stat = os.waitpid(p.pid, os.WNOHANG)
    return p.returncode

def frame(videoFile, position, baseFolder, width=128, redo=False):
    '''
        params:
            videoFile
            position as float in seconds
            baseFolder to write frames to
            width of frame
            redo boolean to extract file even if it exists
    '''
    def frame_path(size):
        return os.path.join(baseFolder, "%s.%s.%s" % (ox.ms2time(position*1000), size, img_extension))

    #not using input file, to slow to extract frame right now
    base_size = 320
    frame = frame_path(base_size)

    if exists(videoFile):
        if redo or not exists(frame):
            if not exists(baseFolder):
                os.makedirs(baseFolder)
            cmd = ['oggThumb', '-t', str(position), '-n', frame, '-s', '%dx0'%base_size, videoFile]
            run_command(cmd)
    if width != base_size:
        frame_base = frame
        frame = frame_path(width)
        if not exists(frame):
            resize_image(frame_base, frame, width)
    return frame

def resize_image(image_source, image_output, width=None, size=None):
    if exists(image_source):
        source = Image.open(image_source)
        source_width = source.size[0]
        source_height = source.size[1]
        if size:
            if source_width > source_height:
                width = size
                height = int(width / (float(source_width) / source_height))
                height = height - height % 2
            else:
                height = size
                width = int(height * (float(source_width) / source_height))
                width = width - width % 2
            
        else:
            height = int(width / (float(source_width) / source_height))
            height = height - height % 2

        if width < source_width:
            resize_method = Image.ANTIALIAS
        else:
            resize_method = Image.BICUBIC
        output = source.resize((width, height), resize_method)
        output.save(image_output)

def timeline(video, prefix):
    cmd = ['oxtimeline', '-i', video, '-o', prefix]
    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    p.wait()

#stats based on timeline images
def average_color(prefix):
    height = 64
    width = 1500
    frames = 0
    pixels = []
    color = np.asarray([0, 0, 0], dtype=np.float32)

    for image in sorted(glob("%s.%d.*.png" % (prefix, height))):
        timeline = Image.open(image)
        frames += timeline.size[0]
        p = np.asarray(timeline, dtype=np.float32)
        p = np.sum(p, axis=0) / height               #average color per frame
        pixels.append(p)

    for i in range(0, len(pixels)):
        p = np.sum(pixels[i], axis=0) / frames
        color += p
    return list(color)

def get_distance(rgb0, rgb1):
    dst = math.sqrt(pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2))
    return dst / math.sqrt(3 * pow(255, 2))

def cuts(prefix):
    cuts = []
    fps = 25
    frames = 0
    height = 64
    width = 1500
    pixels = []
    for image in sorted(glob("%s.%d.*.png" % (prefix, height))):
        timeline = Image.open(image)
        frames += timeline.size[0]
        pixels.append(timeline.load())
    for frame in range(0, frames):
        x = frame % width
        if frame > 0:
            dst = 0
            image0 = int((frame - 1) / width)
            image1 = int(frame / width)
            for y in range(0, height):
                rgb0 = pixels[image0][(x - 1) % width, y]
                rgb1 = pixels[image1][x, y]
                dst += get_distance(rgb0, rgb1) / height
            #print frame / fps, dst
            if dst > 0.1:
                cuts.append(frame / fps)
    return cuts
add extract 2010-09-04 12:59:09 +00:00			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`
			`from __future__ import division, with_statement`

			`import os`
			`from os.path import abspath, join, dirname, exists`

			`import fractions`
			`import subprocess`
			`import sys`
			`import shutil`
			`import tempfile`
			`import time`
			`import re`
			`import math`
			`from glob import glob`

			`import numpy as np`
			`import Image`
			`import ox`
			`from ox.utils import json`


			`img_extension='jpg'`

			`FFMPEG2THEORA = 'ffmpeg2theora'`

			`class AspectRatio(fractions.Fraction):`
			`def __new__(cls, numerator, denominator=None):`
			`if not denominator:`
			`ratio = map(int, numerator.split(':'))`
			`if len(ratio) == 1: ratio.append(1)`
			`numerator = ratio[0]`
			`denominator = ratio[1]`
			`#if its close enough to the common aspect ratios rather use that`
			`if abs(numerator/denominator - 4/3) < 0.03:`
			`numerator = 4`
			`denominator = 3`
			`elif abs(numerator/denominator - 16/9) < 0.02:`
			`numerator = 16`
			`denominator = 9`
			`return super(AspectRatio, cls).__new__(cls, numerator, denominator)`

			`@property`
			`def ratio(self):`
			`return "%d:%d" % (self.numerator, self.denominator)`

			`def stream(video, target, profile, info):`
			`if not os.path.exists(target):`
			`fdir = os.path.dirname(target)`
			`if not os.path.exists(fdir):`
			`os.makedirs(fdir)`

			`dar = AspectRatio(info['video'][0]['display_aspect_ratio'])`
			`'''`
			`WebM look into`
			`lag`
			`mb_static_threshold`
			`qmax/qmin`
			`rc_buf_aggressivity=0.95`
			`token_partitions=4`
			`level / speedlevel`
			`bt?`
			`H264, should bitrates be a bit lower? other stuff possible?`
			`'''`
			`profile, format = profile.split('.')`

			`if profile == '1080p':`
			`height = 1080`

			`audiorate = 48000`
			`audioquality = 6`
			`audiobitrate = None`
			`audiochannels = None`
			`if profile == '720p':`
			`height = 720`

			`audiorate = 48000`
			`audioquality = 5`
			`audiobitrate = None`
			`audiochannels = None`
			`if profile == '480p':`
			`height = 480`

			`audiorate = 44100`
			`audioquality = 2`
			`audiobitrate = None`
			`audiochannels = 2`
			`elif profile == '360p':`
			`height = 360`

			`audiorate = 44100`
			`audioquality = 1`
			`audiobitrate = None`
			`audiochannels = 1`
			`elif profile == '270p':`
			`height = 270`

			`audiorate = 44100`
			`audioquality = 0`
			`audiobitrate = None`
			`audiochannels = 1`
			`else:`
			`height = 96`

			`audiorate = 22050`
			`audioquality = -1`
			`audiobitrate = '22k'`
			`audiochannels = 1`

			`bpp = 0.17`
			`fps = AspectRatio(info['video'][0]['framerate'])`

			`width = int(dar * height)`
			`width += width % 2`

			`bitrate = heightwidthfps*bpp/1000`
			`aspect = dar.ratio`
			`#use 1:1 pixel aspect ratio if dar is close to that`
			`if abs(width/height - dar) < 0.02:`
			`aspect = '%s:%s' % (width, height)`

			`if info['audio']:`
			`audio_settings = ['-ar', str(audiorate), '-aq', str(audioquality)]`
			`if audiochannels and 'channels' in info['audio'][0] and info['audio'][0]['channels'] > audiochannels:`
			`audio_settings += ['-ac', str(audiochannels)]`
			`if audiobitrate:`
			`audio_settings += ['-ab', audiobitrate]`
			`if format == 'mp4':`
			`audio_settings += ['-acodec', 'libfaac']`
			`else:`
			`audio_settings += ['-acodec', 'libvorbis']`
			`else:`
			`audio_settings = ['-an']`

			`if info['video']:`
			`video_settings = [`
			`'-vb', '%dk'%bitrate, '-g', '%d' % int(fps*2),`
			`'-s', '%dx%d'%(width, height),`
			`'-aspect', aspect,`
			`]`
			`if format == 'mp4':`
			`video_settings += [`
			`'-vcodec', 'libx264',`
			`'-flags', '+loop+mv4',`
			`'-cmp', '256',`
			`'-partitions', '+parti4x4+parti8x8+partp4x4+partp8x8+partb8x8',`
			`'-me_method', 'hex',`
			`'-subq', '7',`
			`'-trellis', '1',`
			`'-refs', '5',`
			`'-bf', '3',`
			`'-flags2', '+bpyramid+wpred+mixed_refs+dct8x8',`
			`'-coder', '1',`
			`'-me_range', '16',`
			`'-keyint_min', '25', #FIXME: should this be related to fps?`
			`'-sc_threshold','40',`
			`'-i_qfactor', '0.71',`
			`'-qmin', '10', '-qmax', '51',`
			`'-qdiff', '4'`
			`]`
			`else:`
			`video_settings = ['-vn']`

			`ffmpeg = FFMPEG2THEORA.replace('2theora', '')`
			`cmd = [ffmpeg, '-y', '-threads', '2', '-i', video] \`
			`+ audio_settings \`
			`+ video_settings`

			`if format == 'mp4':`
			`cmd += ["%s.mp4"%target]`
			`else:`
			`cmd += ['-f','webm', target]`

			`print cmd`
			`p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`p.wait()`
			`if format == 'mp4':`
			`cmd = ['qt-faststart', "%s.mp4"%target, target]`
			`print cmd`
			`p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`p.wait()`
			`os.unlink("%s.mp4"%target)`

			`def run_command(cmd, timeout=10):`
			`p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`while timeout > 0:`
			`time.sleep(0.2)`
			`timeout -= 0.2`
			`if p.poll() != None:`
			`return p.returncode`
			`if p.poll() == None:`
			`os.kill(p.pid, 9)`
			`killedpid, stat = os.waitpid(p.pid, os.WNOHANG)`
			`return p.returncode`

			`def frame(videoFile, position, baseFolder, width=128, redo=False):`
			`'''`
			`params:`
			`videoFile`
			`position as float in seconds`
			`baseFolder to write frames to`
			`width of frame`
			`redo boolean to extract file even if it exists`
			`'''`
			`def frame_path(size):`
			`return os.path.join(baseFolder, "%s.%s.%s" % (ox.ms2time(position*1000), size, img_extension))`

			`#not using input file, to slow to extract frame right now`
			`base_size = 320`
			`frame = frame_path(base_size)`

			`if exists(videoFile):`
			`if redo or not exists(frame):`
			`if not exists(baseFolder):`
			`os.makedirs(baseFolder)`
			`cmd = ['oggThumb', '-t', str(position), '-n', frame, '-s', '%dx0'%base_size, videoFile]`
			`run_command(cmd)`
			`if width != base_size:`
			`frame_base = frame`
			`frame = frame_path(width)`
			`if not exists(frame):`
			`resize_image(frame_base, frame, width)`
			`return frame`

add poster/movieId.size.jpg 2010-09-07 14:05:38 +00:00			`def resize_image(image_source, image_output, width=None, size=None):`
add extract 2010-09-04 12:59:09 +00:00			`if exists(image_source):`
			`source = Image.open(image_source)`
			`source_width = source.size[0]`
			`source_height = source.size[1]`
add poster/movieId.size.jpg 2010-09-07 14:05:38 +00:00			`if size:`
			`if source_width > source_height:`
			`width = size`
			`height = int(width / (float(source_width) / source_height))`
			`height = height - height % 2`
			`else:`
			`height = size`
			`width = int(height * (float(source_width) / source_height))`
			`width = width - width % 2`

			`else:`
			`height = int(width / (float(source_width) / source_height))`
			`height = height - height % 2`
add extract 2010-09-04 12:59:09 +00:00
			`if width < source_width:`
			`resize_method = Image.ANTIALIAS`
			`else:`
			`resize_method = Image.BICUBIC`
			`output = source.resize((width, height), resize_method)`
			`output.save(image_output)`

			`def timeline(video, prefix):`
			`cmd = ['oxtimeline', '-i', video, '-o', prefix]`
			`p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`p.wait()`

			`#stats based on timeline images`
			`def average_color(prefix):`
			`height = 64`
			`width = 1500`
			`frames = 0`
			`pixels = []`
			`color = np.asarray([0, 0, 0], dtype=np.float32)`

			`for image in sorted(glob("%s.%d.*.png" % (prefix, height))):`
			`timeline = Image.open(image)`
			`frames += timeline.size[0]`
			`p = np.asarray(timeline, dtype=np.float32)`
			`p = np.sum(p, axis=0) / height #average color per frame`
			`pixels.append(p)`

			`for i in range(0, len(pixels)):`
			`p = np.sum(pixels[i], axis=0) / frames`
			`color += p`
return average color 2010-09-04 13:41:57 +00:00			`return list(color)`
add extract 2010-09-04 12:59:09 +00:00
			`def get_distance(rgb0, rgb1):`
			`dst = math.sqrt(pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2) + pow(rgb0[0] - rgb1[0], 2))`
			`return dst / math.sqrt(3 * pow(255, 2))`

			`def cuts(prefix):`
			`cuts = []`
			`fps = 25`
			`frames = 0`
			`height = 64`
			`width = 1500`
			`pixels = []`
			`for image in sorted(glob("%s.%d.*.png" % (prefix, height))):`
			`timeline = Image.open(image)`
			`frames += timeline.size[0]`
			`pixels.append(timeline.load())`
			`for frame in range(0, frames):`
			`x = frame % width`
			`if frame > 0:`
			`dst = 0`
			`image0 = int((frame - 1) / width)`
			`image1 = int(frame / width)`
			`for y in range(0, height):`
			`rgb0 = pixels[image0][(x - 1) % width, y]`
			`rgb1 = pixels[image1][x, y]`
			`dst += get_distance(rgb0, rgb1) / height`
			`#print frame / fps, dst`
			`if dst > 0.1:`
			`cuts.append(frame / fps)`
			`return cuts`