python-ox/ox/file.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2008
from __future__ import division, with_statement
import os
import hashlib
import re
import shutil
import struct
import subprocess
import sqlite3

from ox.utils import json

__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']

EXTENSIONS = {
    'audio': [
        'aac', 'aif', 'aiff',
        'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma'
    ],
    'image': [
        'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp'
    ],
    'subtitle': [
        'idx', 'srt', 'sub'
    ],
    'video': [
        '3gp',
        'avi', 'divx', 'dv', 'flv', 'm2t', 'm4v', 'mkv', 'mov', 'mp4',
        'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'rm', 'vob', 'webm', 'wmv',
        'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD
        'mxf', 'ts'
    ],
}

def cmd(program):
    local = os.path.expanduser('~/.ox/bin/%s' % program)
    if os.path.exists(local):
        program = local
    return program

def _get_file_cache():
    import ox.cache
    path = ox.cache.cache_path()
    if path.startswith('fs:'):
        path = path[3:]
    return os.path.join(path, 'files.sqlite')

def cache(filename, type='oshash'):
    conn = sqlite3.connect(_get_file_cache(), timeout=10)
    conn.text_factory = str
    conn.row_factory = sqlite3.Row

    if not cache.init:
        c = conn.cursor()
        c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)')
        c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)')
        c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)')
        conn.commit()
        cache.init = True
    c = conn.cursor()
    c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, ))
    stat = os.stat(filename)
    row = None
    h = None
    sha1 = None
    info = ''
    for row in c:
        if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):
            value = row[type]
            if value:
                if type == 'info':
                    value = json.loads(value)
                return value
            h = row['oshash']
            sha1 = row['sha1']
            info = row['info']
    if type == 'oshash':
        value = h = oshash(filename, cached=False)
    elif type == 'sha1':
        value = sha1 = sha1sum(filename, cached=False)
    elif type == 'info':
        value = avinfo(filename, cached=False)
        info = json.dumps(value)
    t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info)
    with conn:
        sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)'
        c.execute(sql, t)
    return value
cache.init = None

def cleanup_cache():
    conn = sqlite3.connect(_get_file_cache(), timeout=10)
    conn.text_factory = str
    conn.row_factory = sqlite3.Row
    c = conn.cursor()
    c.execute('SELECT path FROM cache')
    paths = [r[0] for r in c]
    for path in paths:
        if not os.path.exists(path):
            c.execute('DELETE FROM cache WHERE path = ?', (path, ))
    conn.commit()
    c.execute('VACUUM')
    conn.commit()

def sha1sum(filename, cached=False):
    if cached:
        return cache(filename, 'sha1')
    sha1 = hashlib.sha1()
    with open(filename) as f:
        for chunk in iter(lambda: f.read(128*sha1.block_size), ''):
            sha1.update(chunk)
    return sha1.hexdigest()

'''
    os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
    plus modification for files < 64k, buffer is filled with file data and padded with 0
'''
def oshash(filename, cached=True):
    if cached:
        return cache(filename, 'oshash')
    try:
        longlongformat = 'q'  # long long
        bytesize = struct.calcsize(longlongformat)

        f = open(filename, "rb")

        filesize = os.path.getsize(filename)
        hash = filesize
        if filesize < 65536:
            for x in range(int(filesize/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
        else:
            for x in range(int(65536/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
            f.seek(max(0,filesize-65536),0)
            for x in range(int(65536/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF
        f.close()
        returnedhash =  "%016x" % hash
        return returnedhash
    except(IOError):
        return "IOError"

def avinfo(filename, cached=True):
    if cached:
        return cache(filename, 'info')
    if os.path.getsize(filename):
        ffmpeg2theora = cmd('ffmpeg2theora')
        p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        info, error = p.communicate()
        version = info.split('\n')[0].split(' - ')[0].split(' ')[-1]
        if version < '0.27':
            raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)
        p = subprocess.Popen([ffmpeg2theora, '--info', filename],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        info, error = p.communicate()
        try:
            info = json.loads(info)
        except:
            #remove metadata, can be broken
            reg = re.compile('"metadata": {.*?},', re.DOTALL)
            info = re.sub(reg, '', info)
            info = json.loads(info)
        if 'video' in info:
            for v in info['video']:
                if not 'display_aspect_ratio' in v and 'width' in v:
                    v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
                    v['pixel_aspect_ratio'] = '1:1'
        if len(info.get('audio', [])) > 1:
            if 'metadata' in info['audio'][0]:
                for stream in info['audio']:
                    language = stream.get('metadata', {}).get('language')
                    if language and language != 'und':
                        stream['language'] = language[0]
            else:
                ffmpeg = cmd('ffmpeg')
                p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                stdout, stderr = p.communicate()
                languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l]
                for i, stream in enumerate(info['audio']):
                    language = languages[i]
                    if language and language[0] != 'und':
                        stream['language'] = language[0]
        return info

    return {'path': filename, 'size': 0}

def ffprobe(filename):
    p = subprocess.Popen([
        cmd('ffprobe'),
        '-show_format',
        '-show_streams',
        '-print_format',
        'json',
        '-i', filename

    ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    info, error = p.communicate()
    ffinfo = json.loads(info)

    def fix_value(key, value):
        if key == 'r_frame_rate':
            value = value.replace('/', ':')
        elif key == 'bit_rate':
            value = float(value) / 1000
        elif key == 'duration':
            value = float(value)
        elif key == 'size':
            value = int(value)
        return value

    info = {}
    for key in ('duration', 'size', 'bit_rate'):
        info[{
            'bit_rate': 'bitrate'
        }.get(key, key)] = fix_value(key, ffinfo['format'][key])
    info['audio'] = []
    info['video'] = []
    info['metadata'] = ffinfo['format'].get('tags', {})
    for s in ffinfo['streams']:
        tags =  s.pop('tags', {})
        language = None
        for t in tags:
            if t == 'language':
                language = tags[t]
            else:
                info['metadata'][t] = tags[t]
        if s.get('codec_type') in ('audio', 'video'):
            stream = {}
            if language and language != 'und':
                stream['language'] = language
            keys = [ 
                'codec_name',
                'width',
                'height',
                'bit_rate',
                'index',
                'display_aspect_ratio',
                'sample_rate',
                'channels',
            ]
            if s['codec_type'] == 'video':
                keys += [
                    'sample_aspect_ratio',
                    'r_frame_rate',
                    'pix_fmt',
                ]

            for key in keys:
                if key in s:
                    stream[{
                        'codec_name': 'codec',
                        'bit_rate': 'bitrate',
                        'index': 'id',
                        'r_frame_rate': 'framerate',
                        'sample_rate': 'samplerate',
                        'pix_fmt': 'pixel_format',
                    }.get(key, key)] = fix_value(key, s[key])
            info[s['codec_type']].append(stream)
        else:
            pass
            #print s
    for v in info['video']:
        if not 'display_aspect_ratio' in v and 'width' in v:
            v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
            v['pixel_aspect_ratio'] = '1:1'
    info['oshash'] = oshash(filename)
    info['path'] = os.path.basename(filename)
    return info

def makedirs(path):
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except OSError, e:
            if e.errno != 17:
                raise

def copy_file(source, target, verbose=False):
    if verbose:
        print 'copying', source, 'to', target
    write_path(target)
    shutil.copyfile(source, target)

def read_file(file, verbose=False):
    if verbose:
        print 'reading', file
    f = open(file)
    data = f.read()
    f.close()
    return data

def read_json(file, verbose=False):
    if verbose:
        print 'reading', file
    with open(file) as fd:
        data = json.load(fd)
    return data

def write_file(file, data, verbose=False):
    if verbose:
        print 'writing', file
    write_path(file)
    f = open(file, 'w')
    f.write(data)
    f.close()
    return len(data)

def write_image(file, image, verbose=False):
    if verbose:
        print 'writing', file
    write_path(file)
    image.save(file)

def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False):
    data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys)
    write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose)

def write_link(source, target, verbose=False):
    if verbose:
        print 'linking', source, 'to', target
    write_path(target)
    if os.path.exists(target):
        os.unlink(target)
    os.symlink(source, target)

def write_path(file):
    path = os.path.split(file)[0]
    if path and not os.path.exists(path):
        os.makedirs(path)
add some functions 2008-04-27 16:54:37 +00:00			`# -- coding: utf-8 --`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:21:21 +00:00			`# vi:si:et:sw=4:sts=4:ts=4`
move and rename some 2008-07-06 13:00:06 +00:00			`# GPL 2008`
use iter to read file with a multiple of hash block_size(sha1sum) 2013-10-14 10:35:07 +00:00			`from __future__ import division, with_statement`
add some functions 2008-04-27 16:54:37 +00:00			`import os`
use hashlib instead of sha 2009-03-16 17:15:14 +00:00			`import hashlib`
work around ffmpeg2theora problems with some metadata output 2010-11-15 16:35:07 +00:00			`import re`
saner copyfile method (that works for files > max python str len) 2012-12-26 20:26:43 +00:00			`import shutil`
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`import struct`
avinfo, get info about file 2009-08-07 11:35:28 +00:00			`import subprocess`
add cache for file functions: oshash/sha1sum/avinfo 2012-08-21 07:35:37 +00:00			`import sqlite3`
avinfo, get info about file 2009-08-07 11:35:28 +00:00
import json/simplejson in one place and use that 2010-07-28 13:08:06 +00:00			`from ox.utils import json`
add oshash 2009-05-28 17:00:30 +00:00
ox.makedirs 2011-04-06 13:24:32 +00:00			`__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']`
add some functions 2008-04-27 16:54:37 +00:00
move EXTENSIONS to ox.file and add image type 2013-10-14 18:07:05 +00:00			`EXTENSIONS = {`
			`'audio': [`
add aiff 2014-05-17 20:24:17 +00:00			`'aac', 'aif', 'aiff',`
			`'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma'`
move EXTENSIONS to ox.file and add image type 2013-10-14 18:07:05 +00:00			`],`
			`'image': [`
			`'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp'`
			`],`
			`'subtitle': [`
			`'idx', 'srt', 'sub'`
			`],`
			`'video': [`
			`'3gp',`
			`'avi', 'divx', 'dv', 'flv', 'm2t', 'm4v', 'mkv', 'mov', 'mp4',`
add more video extensions 2014-06-04 11:04:25 +00:00			`'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'rm', 'vob', 'webm', 'wmv',`
			`'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD`
add ts to video extensions 2014-09-29 16:03:56 +00:00			`'mxf', 'ts'`
move EXTENSIONS to ox.file and add image type 2013-10-14 18:07:05 +00:00			`],`
			`}`
add cache for file functions: oshash/sha1sum/avinfo 2012-08-21 07:35:37 +00:00
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`def cmd(program):`
			`local = os.path.expanduser('~/.ox/bin/%s' % program)`
			`if os.path.exists(local):`
			`program = local`
			`return program`

add cache for file functions: oshash/sha1sum/avinfo 2012-08-21 07:35:37 +00:00			`def _get_file_cache():`
			`import ox.cache`
work around thread issues with ox.cache 2014-05-17 09:25:19 +00:00			`path = ox.cache.cache_path()`
			`if path.startswith('fs:'):`
			`path = path[3:]`
			`return os.path.join(path, 'files.sqlite')`
add cache for file functions: oshash/sha1sum/avinfo 2012-08-21 07:35:37 +00:00
			`def cache(filename, type='oshash'):`
			`conn = sqlite3.connect(_get_file_cache(), timeout=10)`
			`conn.text_factory = str`
			`conn.row_factory = sqlite3.Row`

			`if not cache.init:`
			`c = conn.cursor()`
			`c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)')`
			`c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)')`
			`c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)')`
			`conn.commit()`
			`cache.init = True`
			`c = conn.cursor()`
			`c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, ))`
			`stat = os.stat(filename)`
			`row = None`
			`h = None`
			`sha1 = None`
			`info = ''`
			`for row in c:`
			`if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):`
			`value = row[type]`
			`if value:`
			`if type == 'info':`
			`value = json.loads(value)`
			`return value`
			`h = row['oshash']`
			`sha1 = row['sha1']`
			`info = row['info']`
			`if type == 'oshash':`
			`value = h = oshash(filename, cached=False)`
			`elif type == 'sha1':`
			`value = sha1 = sha1sum(filename, cached=False)`
			`elif type == 'info':`
			`value = avinfo(filename, cached=False)`
			`info = json.dumps(value)`
			`t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info)`
			`with conn:`
			`sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)'`
			`c.execute(sql, t)`
			`return value`
			`cache.init = None`

			`def cleanup_cache():`
			`conn = sqlite3.connect(_get_file_cache(), timeout=10)`
			`conn.text_factory = str`
			`conn.row_factory = sqlite3.Row`
			`c = conn.cursor()`
			`c.execute('SELECT path FROM cache')`
			`paths = [r[0] for r in c]`
			`for path in paths:`
			`if not os.path.exists(path):`
			`c.execute('DELETE FROM cache WHERE path = ?', (path, ))`
			`conn.commit()`
			`c.execute('VACUUM')`
			`conn.commit()`

			`def sha1sum(filename, cached=False):`
			`if cached:`
			`return cache(filename, 'sha1')`
use hashlib instead of sha 2009-03-16 17:15:14 +00:00			`sha1 = hashlib.sha1()`
use iter to read file with a multiple of hash block_size(sha1sum) 2013-10-14 10:35:07 +00:00			`with open(filename) as f:`
			`for chunk in iter(lambda: f.read(128*sha1.block_size), ''):`
			`sha1.update(chunk)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:21:21 +00:00			`return sha1.hexdigest()`
add some functions 2008-04-27 16:54:37 +00:00
add oshash 2009-05-28 17:00:30 +00:00			`'''`
			`os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes`
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`plus modification for files < 64k, buffer is filled with file data and padded with 0`
add oshash 2009-05-28 17:00:30 +00:00			`'''`
add cache for file functions: oshash/sha1sum/avinfo 2012-08-21 07:35:37 +00:00			`def oshash(filename, cached=True):`
			`if cached:`
			`return cache(filename, 'oshash')`
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`try:`
			`longlongformat = 'q' # long long`
			`bytesize = struct.calcsize(longlongformat)`
add oshash 2009-05-28 17:00:30 +00:00
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`f = open(filename, "rb")`

			`filesize = os.path.getsize(filename)`
			`hash = filesize`
			`if filesize < 65536:`
			`for x in range(int(filesize/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number`
			`else:`
			`for x in range(int(65536/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number`
			`f.seek(max(0,filesize-65536),0)`
			`for x in range(int(65536/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF`
			`f.close()`
			`returnedhash = "%016x" % hash`
			`return returnedhash`
			`except(IOError):`
add oshash 2009-05-28 17:00:30 +00:00			`return "IOError"`
add some functions 2008-04-27 16:54:37 +00:00
add cache for file functions: oshash/sha1sum/avinfo 2012-08-21 07:35:37 +00:00			`def avinfo(filename, cached=True):`
			`if cached:`
			`return cache(filename, 'info')`
avinfo should not fail for emptpy files 2010-02-27 11:47:13 +00:00			`if os.path.getsize(filename):`
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`ffmpeg2theora = cmd('ffmpeg2theora')`
fail if ffmpeg2theora is not up to date 2012-03-21 22:45:19 +00:00			`p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`info, error = p.communicate()`
dont fail with f2t 0.27 2012-03-23 22:00:54 +00:00			`version = info.split('\n')[0].split(' - ')[0].split(' ')[-1]`
fail if ffmpeg2theora is not up to date 2012-03-21 22:45:19 +00:00			`if version < '0.27':`
			`raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)`
use local ffmpeg2theora if available 2012-02-24 17:07:55 +00:00			`p = subprocess.Popen([ffmpeg2theora, '--info', filename],`
f2t sometimes does not list display aspect ratio, set it to width/height in that case 2011-03-26 11:12:56 +00:00			`stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
avinfo should not fail for emptpy files 2010-02-27 11:47:13 +00:00			`info, error = p.communicate()`
work around ffmpeg2theora problems with some metadata output 2010-11-15 16:35:07 +00:00			`try:`
			`info = json.loads(info)`
			`except:`
			`#remove metadata, can be broken`
			`reg = re.compile('"metadata": {.*?},', re.DOTALL)`
			`info = re.sub(reg, '', info)`
			`info = json.loads(info)`
f2t sometimes does not list display aspect ratio, set it to width/height in that case 2011-03-26 11:12:56 +00:00			`if 'video' in info:`
			`for v in info['video']:`
more format workarounds in avinfo 2011-04-19 13:42:04 +00:00			`if not 'display_aspect_ratio' in v and 'width' in v:`
f2t sometimes does not list display aspect ratio, set it to width/height in that case 2011-03-26 11:12:56 +00:00			`v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])`
			`v['pixel_aspect_ratio'] = '1:1'`
parse language from audio track if video has multiple audio tracks 2014-07-20 09:35:55 +00:00			`if len(info.get('audio', [])) > 1:`
use metadata from ffmpeg2theora if available 2014-07-20 10:54:13 +00:00			`if 'metadata' in info['audio'][0]:`
			`for stream in info['audio']:`
			`language = stream.get('metadata', {}).get('language')`
			`if language and language != 'und':`
			`stream['language'] = language[0]`
			`else:`
			`ffmpeg = cmd('ffmpeg')`
			`p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`stdout, stderr = p.communicate()`
			`languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l]`
			`for i, stream in enumerate(info['audio']):`
			`language = languages[i]`
			`if language and language[0] != 'und':`
			`stream['language'] = language[0]`
work around ffmpeg2theora problems with some metadata output 2010-11-15 16:35:07 +00:00			`return info`

avinfo should not fail for emptpy files 2010-02-27 11:47:13 +00:00			`return {'path': filename, 'size': 0}`
ox.makedirs 2011-04-06 13:24:32 +00:00
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`def ffprobe(filename):`
			`p = subprocess.Popen([`
			`cmd('ffprobe'),`
			`'-show_format',`
			`'-show_streams',`
			`'-print_format',`
			`'json',`
			`'-i', filename`

			`], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`info, error = p.communicate()`
			`ffinfo = json.loads(info)`

			`def fix_value(key, value):`
			`if key == 'r_frame_rate':`
			`value = value.replace('/', ':')`
			`elif key == 'bit_rate':`
			`value = float(value) / 1000`
			`elif key == 'duration':`
			`value = float(value)`
			`elif key == 'size':`
			`value = int(value)`
			`return value`

			`info = {}`
			`for key in ('duration', 'size', 'bit_rate'):`
			`info[{`
			`'bit_rate': 'bitrate'`
			`}.get(key, key)] = fix_value(key, ffinfo['format'][key])`
			`info['audio'] = []`
			`info['video'] = []`
			`info['metadata'] = ffinfo['format'].get('tags', {})`
			`for s in ffinfo['streams']:`
			`tags = s.pop('tags', {})`
map track language to track 2014-07-20 09:20:43 +00:00			`language = None`
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`for t in tags:`
map track language to track 2014-07-20 09:20:43 +00:00			`if t == 'language':`
			`language = tags[t]`
			`else:`
			`info['metadata'][t] = tags[t]`
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`if s.get('codec_type') in ('audio', 'video'):`
			`stream = {}`
map track language to track 2014-07-20 09:20:43 +00:00			`if language and language != 'und':`
			`stream['language'] = language`
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`keys = [`
			`'codec_name',`
			`'width',`
			`'height',`
			`'bit_rate',`
			`'index',`
			`'display_aspect_ratio',`
			`'sample_rate',`
			`'channels',`
			`]`
			`if s['codec_type'] == 'video':`
			`keys += [`
			`'sample_aspect_ratio',`
			`'r_frame_rate',`
			`'pix_fmt',`
			`]`

			`for key in keys:`
			`if key in s:`
			`stream[{`
			`'codec_name': 'codec',`
			`'bit_rate': 'bitrate',`
			`'index': 'id',`
			`'r_frame_rate': 'framerate',`
			`'sample_rate': 'samplerate',`
			`'pix_fmt': 'pixel_format',`
			`}.get(key, key)] = fix_value(key, s[key])`
			`info[s['codec_type']].append(stream)`
			`else:`
			`pass`
			`#print s`
			`for v in info['video']:`
			`if not 'display_aspect_ratio' in v and 'width' in v:`
			`v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])`
			`v['pixel_aspect_ratio'] = '1:1'`
use iter to read file with a multiple of hash block_size(sha1sum) 2013-10-14 10:35:07 +00:00			`info['oshash'] = oshash(filename)`
add ffprobe wrapper 2012-06-11 10:11:06 +00:00			`info['path'] = os.path.basename(filename)`
			`return info`

ox.makedirs 2011-04-06 13:24:32 +00:00			`def makedirs(path):`
			`if not os.path.exists(path):`
			`try:`
			`os.makedirs(path)`
			`except OSError, e:`
			`if e.errno != 17:`
			`raise`
update file module, add get_sort_name and get_sort_title to text module 2011-10-11 14:14:29 +00:00
			`def copy_file(source, target, verbose=False):`
			`if verbose:`
			`print 'copying', source, 'to', target`
saner copyfile method (that works for files > max python str len) 2012-12-26 20:26:43 +00:00			`write_path(target)`
			`shutil.copyfile(source, target)`
update file module, add get_sort_name and get_sort_title to text module 2011-10-11 14:14:29 +00:00
			`def read_file(file, verbose=False):`
			`if verbose:`
			`print 'reading', file`
			`f = open(file)`
			`data = f.read()`
			`f.close()`
			`return data`

			`def read_json(file, verbose=False):`
no need to load json string into ram 2013-11-15 15:16:21 +00:00			`if verbose:`
			`print 'reading', file`
			`with open(file) as fd:`
			`data = json.load(fd)`
			`return data`
update file module, add get_sort_name and get_sort_title to text module 2011-10-11 14:14:29 +00:00
			`def write_file(file, data, verbose=False):`
			`if verbose:`
			`print 'writing', file`
			`write_path(file)`
			`f = open(file, 'w')`
			`f.write(data)`
			`f.close()`
			`return len(data)`

ox.file: add ensure_ascii parameter to write_json; add write_image method (write_path + image.save) 2013-06-09 14:45:26 +00:00			`def write_image(file, image, verbose=False):`
			`if verbose:`
			`print 'writing', file`
			`write_path(file)`
			`image.save(file)`

			`def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False):`
			`data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys)`
			`write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose)`
update file module, add get_sort_name and get_sort_title to text module 2011-10-11 14:14:29 +00:00
			`def write_link(source, target, verbose=False):`
			`if verbose:`
			`print 'linking', source, 'to', target`
			`write_path(target)`
			`if os.path.exists(target):`
			`os.unlink(target)`
			`os.symlink(source, target)`

			`def write_path(file):`
			`path = os.path.split(file)[0]`
			`if path and not os.path.exists(path):`
			`os.makedirs(path)`