openmedialibrary_platform/Shared/lib/python2.7/site-packages/ox/file.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2008
from __future__ import division, with_statement
import os
import hashlib
import re
import shutil
import struct
import subprocess
import sqlite3

from ox.utils import json

__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']

EXTENSIONS = {
    'audio': [
        'aac', 'aif', 'aiff',
        'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma'
    ],
    'image': [
        'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp'
    ],
    'subtitle': [
        'idx', 'srt', 'sub'
    ],
    'video': [
        '3gp',
        'avi', 'divx', 'dv', 'flv', 'm2t', 'm4v', 'mkv', 'mov', 'mp4',
        'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'rm', 'vob', 'webm', 'wmv',
        'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD
        'mxf'
    ],
}

def cmd(program):
    local = os.path.expanduser('~/.ox/bin/%s' % program)
    if os.path.exists(local):
        program = local
    return program

def _get_file_cache():
    import ox.cache
    path = ox.cache.cache_path()
    if path.startswith('fs:'):
        path = path[3:]
    return os.path.join(path, 'files.sqlite')

def cache(filename, type='oshash'):
    conn = sqlite3.connect(_get_file_cache(), timeout=10)
    conn.text_factory = str
    conn.row_factory = sqlite3.Row

    if not cache.init:
        c = conn.cursor()
        c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)')
        c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)')
        c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)')
        conn.commit()
        cache.init = True
    c = conn.cursor()
    c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, ))
    stat = os.stat(filename)
    row = None
    h = None
    sha1 = None
    info = ''
    for row in c:
        if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):
            value = row[type]
            if value:
                if type == 'info':
                    value = json.loads(value)
                return value
            h = row['oshash']
            sha1 = row['sha1']
            info = row['info']
    if type == 'oshash':
        value = h = oshash(filename, cached=False)
    elif type == 'sha1':
        value = sha1 = sha1sum(filename, cached=False)
    elif type == 'info':
        value = avinfo(filename, cached=False)
        info = json.dumps(value)
    t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info)
    with conn:
        sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)'
        c.execute(sql, t)
    return value
cache.init = None

def cleanup_cache():
    conn = sqlite3.connect(_get_file_cache(), timeout=10)
    conn.text_factory = str
    conn.row_factory = sqlite3.Row
    c = conn.cursor()
    c.execute('SELECT path FROM cache')
    paths = [r[0] for r in c]
    for path in paths:
        if not os.path.exists(path):
            c.execute('DELETE FROM cache WHERE path = ?', (path, ))
    conn.commit()
    c.execute('VACUUM')
    conn.commit()

def sha1sum(filename, cached=False):
    if cached:
        return cache(filename, 'sha1')
    sha1 = hashlib.sha1()
    with open(filename) as f:
        for chunk in iter(lambda: f.read(128*sha1.block_size), ''):
            sha1.update(chunk)
    return sha1.hexdigest()

'''
    os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
    plus modification for files < 64k, buffer is filled with file data and padded with 0
'''
def oshash(filename, cached=True):
    if cached:
        return cache(filename, 'oshash')
    try:
        longlongformat = 'q'  # long long
        bytesize = struct.calcsize(longlongformat)

        f = open(filename, "rb")

        filesize = os.path.getsize(filename)
        hash = filesize
        if filesize < 65536:
            for x in range(int(filesize/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
        else:
            for x in range(int(65536/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
            f.seek(max(0,filesize-65536),0)
            for x in range(int(65536/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF
        f.close()
        returnedhash =  "%016x" % hash
        return returnedhash
    except(IOError):
        return "IOError"

def avinfo(filename, cached=True):
    if cached:
        return cache(filename, 'info')
    if os.path.getsize(filename):
        ffmpeg2theora = cmd('ffmpeg2theora')
        p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        info, error = p.communicate()
        version = info.split('\n')[0].split(' - ')[0].split(' ')[-1]
        if version < '0.27':
            raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)
        p = subprocess.Popen([ffmpeg2theora, '--info', filename],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        info, error = p.communicate()
        try:
            info = json.loads(info)
        except:
            #remove metadata, can be broken
            reg = re.compile('"metadata": {.*?},', re.DOTALL)
            info = re.sub(reg, '', info)
            info = json.loads(info)
        if 'video' in info:
            for v in info['video']:
                if not 'display_aspect_ratio' in v and 'width' in v:
                    v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
                    v['pixel_aspect_ratio'] = '1:1'
        return info

    return {'path': filename, 'size': 0}

def ffprobe(filename):
    p = subprocess.Popen([
        cmd('ffprobe'),
        '-show_format',
        '-show_streams',
        '-print_format',
        'json',
        '-i', filename

    ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    info, error = p.communicate()
    ffinfo = json.loads(info)

    def fix_value(key, value):
        if key == 'r_frame_rate':
            value = value.replace('/', ':')
        elif key == 'bit_rate':
            value = float(value) / 1000
        elif key == 'duration':
            value = float(value)
        elif key == 'size':
            value = int(value)
        return value

    info = {}
    for key in ('duration', 'size', 'bit_rate'):
        info[{
            'bit_rate': 'bitrate'
        }.get(key, key)] = fix_value(key, ffinfo['format'][key])
    info['audio'] = []
    info['video'] = []
    info['metadata'] = ffinfo['format'].get('tags', {})
    for s in ffinfo['streams']:
        tags =  s.pop('tags', {})
        for t in tags:
            info['metadata'][t] = tags[t]
        if s.get('codec_type') in ('audio', 'video'):
            stream = {}
            keys = [ 
                'codec_name',
                'width',
                'height',
                'bit_rate',
                'index',
                'display_aspect_ratio',
                'sample_rate',
                'channels',
            ]
            if s['codec_type'] == 'video':
                keys += [
                    'sample_aspect_ratio',
                    'r_frame_rate',
                    'pix_fmt',
                ]

            for key in keys:
                if key in s:
                    stream[{
                        'codec_name': 'codec',
                        'bit_rate': 'bitrate',
                        'index': 'id',
                        'r_frame_rate': 'framerate',
                        'sample_rate': 'samplerate',
                        'pix_fmt': 'pixel_format',
                    }.get(key, key)] = fix_value(key, s[key])
            info[s['codec_type']].append(stream)
        else:
            pass
            #print s
    for v in info['video']:
        if not 'display_aspect_ratio' in v and 'width' in v:
            v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
            v['pixel_aspect_ratio'] = '1:1'
    info['oshash'] = oshash(filename)
    info['path'] = os.path.basename(filename)
    return info

def makedirs(path):
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except OSError, e:
            if e.errno != 17:
                raise

def copy_file(source, target, verbose=False):
    if verbose:
        print 'copying', source, 'to', target
    write_path(target)
    shutil.copyfile(source, target)

def read_file(file, verbose=False):
    if verbose:
        print 'reading', file
    f = open(file)
    data = f.read()
    f.close()
    return data

def read_json(file, verbose=False):
    if verbose:
        print 'reading', file
    with open(file) as fd:
        data = json.load(fd)
    return data

def write_file(file, data, verbose=False):
    if verbose:
        print 'writing', file
    write_path(file)
    f = open(file, 'w')
    f.write(data)
    f.close()
    return len(data)

def write_image(file, image, verbose=False):
    if verbose:
        print 'writing', file
    write_path(file)
    image.save(file)

def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False):
    data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys)
    write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose)

def write_link(source, target, verbose=False):
    if verbose:
        print 'linking', source, 'to', target
    write_path(target)
    if os.path.exists(target):
        os.unlink(target)
    os.symlink(source, target)

def write_path(file):
    path = os.path.split(file)[0]
    if path and not os.path.exists(path):
        os.makedirs(path)
Open Media Library Platform 2013-10-11 17:28:32 +00:00			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`
			`# GPL 2008`
			`from __future__ import division, with_statement`
			`import os`
			`import hashlib`
			`import re`
			`import shutil`
			`import struct`
			`import subprocess`
			`import sqlite3`

			`from ox.utils import json`

			`__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']`

			`EXTENSIONS = {`
			`'audio': [`
update to tornado 4.0 and requests 2.3.0 2014-08-12 08:44:01 +00:00			`'aac', 'aif', 'aiff',`
			`'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma'`
Open Media Library Platform 2013-10-11 17:28:32 +00:00			`],`
			`'image': [`
			`'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp'`
			`],`
			`'subtitle': [`
			`'idx', 'srt', 'sub'`
			`],`
			`'video': [`
			`'3gp',`
			`'avi', 'divx', 'dv', 'flv', 'm2t', 'm4v', 'mkv', 'mov', 'mp4',`
update to tornado 4.0 and requests 2.3.0 2014-08-12 08:44:01 +00:00			`'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'rm', 'vob', 'webm', 'wmv',`
			`'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD`
			`'mxf'`
Open Media Library Platform 2013-10-11 17:28:32 +00:00			`],`
			`}`

			`def cmd(program):`
			`local = os.path.expanduser('~/.ox/bin/%s' % program)`
			`if os.path.exists(local):`
			`program = local`
			`return program`

			`def _get_file_cache():`
			`import ox.cache`
update to tornado 4.0 and requests 2.3.0 2014-08-12 08:44:01 +00:00			`path = ox.cache.cache_path()`
			`if path.startswith('fs:'):`
			`path = path[3:]`
			`return os.path.join(path, 'files.sqlite')`
Open Media Library Platform 2013-10-11 17:28:32 +00:00
			`def cache(filename, type='oshash'):`
			`conn = sqlite3.connect(_get_file_cache(), timeout=10)`
			`conn.text_factory = str`
			`conn.row_factory = sqlite3.Row`

			`if not cache.init:`
			`c = conn.cursor()`
			`c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)')`
			`c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)')`
			`c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)')`
			`conn.commit()`
			`cache.init = True`
			`c = conn.cursor()`
			`c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, ))`
			`stat = os.stat(filename)`
			`row = None`
			`h = None`
			`sha1 = None`
			`info = ''`
			`for row in c:`
			`if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):`
			`value = row[type]`
			`if value:`
			`if type == 'info':`
			`value = json.loads(value)`
			`return value`
			`h = row['oshash']`
			`sha1 = row['sha1']`
			`info = row['info']`
			`if type == 'oshash':`
			`value = h = oshash(filename, cached=False)`
			`elif type == 'sha1':`
			`value = sha1 = sha1sum(filename, cached=False)`
			`elif type == 'info':`
			`value = avinfo(filename, cached=False)`
			`info = json.dumps(value)`
			`t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info)`
			`with conn:`
			`sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)'`
			`c.execute(sql, t)`
			`return value`
			`cache.init = None`

			`def cleanup_cache():`
			`conn = sqlite3.connect(_get_file_cache(), timeout=10)`
			`conn.text_factory = str`
			`conn.row_factory = sqlite3.Row`
			`c = conn.cursor()`
			`c.execute('SELECT path FROM cache')`
			`paths = [r[0] for r in c]`
			`for path in paths:`
			`if not os.path.exists(path):`
			`c.execute('DELETE FROM cache WHERE path = ?', (path, ))`
			`conn.commit()`
			`c.execute('VACUUM')`
			`conn.commit()`

			`def sha1sum(filename, cached=False):`
			`if cached:`
			`return cache(filename, 'sha1')`
			`sha1 = hashlib.sha1()`
			`with open(filename) as f:`
			`for chunk in iter(lambda: f.read(128*sha1.block_size), ''):`
			`sha1.update(chunk)`
			`return sha1.hexdigest()`

			`'''`
			`os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes`
			`plus modification for files < 64k, buffer is filled with file data and padded with 0`
			`'''`
			`def oshash(filename, cached=True):`
			`if cached:`
			`return cache(filename, 'oshash')`
			`try:`
			`longlongformat = 'q' # long long`
			`bytesize = struct.calcsize(longlongformat)`

			`f = open(filename, "rb")`

			`filesize = os.path.getsize(filename)`
			`hash = filesize`
			`if filesize < 65536:`
			`for x in range(int(filesize/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number`
			`else:`
			`for x in range(int(65536/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number`
			`f.seek(max(0,filesize-65536),0)`
			`for x in range(int(65536/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF`
			`f.close()`
			`returnedhash = "%016x" % hash`
			`return returnedhash`
			`except(IOError):`
			`return "IOError"`

			`def avinfo(filename, cached=True):`
			`if cached:`
			`return cache(filename, 'info')`
			`if os.path.getsize(filename):`
			`ffmpeg2theora = cmd('ffmpeg2theora')`
			`p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`info, error = p.communicate()`
			`version = info.split('\n')[0].split(' - ')[0].split(' ')[-1]`
			`if version < '0.27':`
			`raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)`
			`p = subprocess.Popen([ffmpeg2theora, '--info', filename],`
			`stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`info, error = p.communicate()`
			`try:`
			`info = json.loads(info)`
			`except:`
			`#remove metadata, can be broken`
			`reg = re.compile('"metadata": {.*?},', re.DOTALL)`
			`info = re.sub(reg, '', info)`
			`info = json.loads(info)`
			`if 'video' in info:`
			`for v in info['video']:`
			`if not 'display_aspect_ratio' in v and 'width' in v:`
			`v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])`
			`v['pixel_aspect_ratio'] = '1:1'`
			`return info`

			`return {'path': filename, 'size': 0}`

			`def ffprobe(filename):`
			`p = subprocess.Popen([`
			`cmd('ffprobe'),`
			`'-show_format',`
			`'-show_streams',`
			`'-print_format',`
			`'json',`
			`'-i', filename`

			`], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`info, error = p.communicate()`
			`ffinfo = json.loads(info)`

			`def fix_value(key, value):`
			`if key == 'r_frame_rate':`
			`value = value.replace('/', ':')`
			`elif key == 'bit_rate':`
			`value = float(value) / 1000`
			`elif key == 'duration':`
			`value = float(value)`
			`elif key == 'size':`
			`value = int(value)`
			`return value`

			`info = {}`
			`for key in ('duration', 'size', 'bit_rate'):`
			`info[{`
			`'bit_rate': 'bitrate'`
			`}.get(key, key)] = fix_value(key, ffinfo['format'][key])`
			`info['audio'] = []`
			`info['video'] = []`
			`info['metadata'] = ffinfo['format'].get('tags', {})`
			`for s in ffinfo['streams']:`
			`tags = s.pop('tags', {})`
			`for t in tags:`
			`info['metadata'][t] = tags[t]`
			`if s.get('codec_type') in ('audio', 'video'):`
			`stream = {}`
			`keys = [`
			`'codec_name',`
			`'width',`
			`'height',`
			`'bit_rate',`
			`'index',`
			`'display_aspect_ratio',`
			`'sample_rate',`
			`'channels',`
			`]`
			`if s['codec_type'] == 'video':`
			`keys += [`
			`'sample_aspect_ratio',`
			`'r_frame_rate',`
			`'pix_fmt',`
			`]`

			`for key in keys:`
			`if key in s:`
			`stream[{`
			`'codec_name': 'codec',`
			`'bit_rate': 'bitrate',`
			`'index': 'id',`
			`'r_frame_rate': 'framerate',`
			`'sample_rate': 'samplerate',`
			`'pix_fmt': 'pixel_format',`
			`}.get(key, key)] = fix_value(key, s[key])`
			`info[s['codec_type']].append(stream)`
			`else:`
			`pass`
			`#print s`
			`for v in info['video']:`
			`if not 'display_aspect_ratio' in v and 'width' in v:`
			`v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])`
			`v['pixel_aspect_ratio'] = '1:1'`
			`info['oshash'] = oshash(filename)`
			`info['path'] = os.path.basename(filename)`
			`return info`

			`def makedirs(path):`
			`if not os.path.exists(path):`
			`try:`
			`os.makedirs(path)`
			`except OSError, e:`
			`if e.errno != 17:`
			`raise`

			`def copy_file(source, target, verbose=False):`
			`if verbose:`
			`print 'copying', source, 'to', target`
			`write_path(target)`
			`shutil.copyfile(source, target)`

			`def read_file(file, verbose=False):`
			`if verbose:`
			`print 'reading', file`
			`f = open(file)`
			`data = f.read()`
			`f.close()`
			`return data`

			`def read_json(file, verbose=False):`
			`if verbose:`
			`print 'reading', file`
			`with open(file) as fd:`
			`data = json.load(fd)`
			`return data`

			`def write_file(file, data, verbose=False):`
			`if verbose:`
			`print 'writing', file`
			`write_path(file)`
			`f = open(file, 'w')`
			`f.write(data)`
			`f.close()`
			`return len(data)`

			`def write_image(file, image, verbose=False):`
			`if verbose:`
			`print 'writing', file`
			`write_path(file)`
			`image.save(file)`

			`def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False):`
			`data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys)`
			`write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose)`

			`def write_link(source, target, verbose=False):`
			`if verbose:`
			`print 'linking', source, 'to', target`
			`write_path(target)`
			`if os.path.exists(target):`
			`os.unlink(target)`
			`os.symlink(source, target)`

			`def write_path(file):`
			`path = os.path.split(file)[0]`
			`if path and not os.path.exists(path):`
			`os.makedirs(path)`