# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 # GPL 2008 from __future__ import division, with_statement, print_function import os import hashlib import re import shutil import struct import subprocess import sqlite3 from distutils.spawn import find_executable from .utils import json __all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs'] EXTENSIONS = { 'audio': [ 'aac', 'aif', 'aiff', 'amr', 'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma' ], 'image': [ 'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp' ], 'subtitle': [ 'idx', 'srt', 'sub', 'vtt' ], 'video': [ '3gp', 'avi', 'divx', 'dv', 'flv', 'm2t', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'rm', 'rmvb', 'vob', 'webm', 'wmv', 'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD 'mxf', 'ts' ], } def cmd(program): local = os.path.expanduser('~/.ox/bin/%s' % program) if os.path.exists(local): program = local return program def _get_file_cache(): import ox.cache path = ox.cache.cache_path() if path.startswith('fs:'): path = path[3:] return os.path.join(path, 'files.sqlite') def cache(filename, type='oshash', update=False): conn = sqlite3.connect(_get_file_cache(), timeout=10) conn.row_factory = sqlite3.Row if not cache.init: c = conn.cursor() c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)') c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)') c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)') conn.commit() cache.init = True c = conn.cursor() c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, )) stat = os.stat(filename) row = None h = None sha1 = None info = '' for row in c: if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']): if not update: value = row[type] if value: if type == 'info': value = json.loads(value) return value h = row['oshash'] sha1 = row['sha1'] info = row['info'] if type == 'oshash': value = h = oshash(filename, cached=False) elif type == 'sha1': value = sha1 = sha1sum(filename, cached=False) elif type == 'info': value = avinfo(filename, cached=False) info = json.dumps(value) t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info) with conn: sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)' c.execute(sql, t) return value cache.init = None def cleanup_cache(): conn = sqlite3.connect(_get_file_cache(), timeout=10) conn.row_factory = sqlite3.Row c = conn.cursor() c.execute('SELECT path FROM cache') paths = [r[0] for r in c] for path in paths: if not os.path.exists(path): c.execute('DELETE FROM cache WHERE path = ?', (path, )) conn.commit() c.execute('VACUUM') conn.commit() def sha1sum(filename, cached=False): if cached: return cache(filename, 'sha1') sha1 = hashlib.sha1() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(128*sha1.block_size), b''): sha1.update(chunk) return sha1.hexdigest() ''' os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes plus modification for files < 64k, buffer is filled with file data and padded with 0 ''' def oshash(filename, cached=True): if cached: return cache(filename, 'oshash') try: longlongformat = 'q' # long long bytesize = struct.calcsize(longlongformat) f = open(filename, "rb") filesize = os.path.getsize(filename) hash = filesize if filesize < 65536: for x in range(int(filesize/bytesize)): buffer = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number else: for x in range(int(65536/bytesize)): buffer = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number f.seek(max(0, filesize-65536), 0) for x in range(int(65536/bytesize)): buffer = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF f.close() returnedhash = "%016x" % hash return returnedhash except IOError: return "IOError" def avinfo(filename, cached=True): if cached: return cache(filename, 'info') if os.path.getsize(filename): if find_executable('ffprobe'): return ffprobe(filename) ffmpeg2theora = cmd('ffmpeg2theora') p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, error = p.communicate() stdout = stdout.decode('utf-8') version = stdout.split('\n')[0].split(' - ')[0].split(' ')[-1] if version < '0.27': raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version) p = subprocess.Popen([ffmpeg2theora, '--info', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, error = p.communicate() try: info = json.loads(stdout) except: # remove metadata, can be broken stdout = stdout.decode('utf-8') reg = re.compile('"metadata": {.*?},', re.DOTALL) stdout = re.sub(reg, '', stdout) info = json.loads(stdout) if 'video' in info: for v in info['video']: if 'display_aspect_ratio' not in v and 'width' in v: v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height']) v['pixel_aspect_ratio'] = '1:1' if len(info.get('audio', [])) > 1: if 'metadata' in info['audio'][0]: for stream in info['audio']: language = stream.get('metadata', {}).get('language') if language and language != 'und': stream['language'] = language[0] else: ffmpeg = cmd('ffmpeg') p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() stderr = stderr.decode('utf-8') languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l] if len(languages) == len(info['audio']): for i, stream in enumerate(info['audio']): language = languages[i] if language and language[0] != 'und': stream['language'] = language[0] return info return {'path': filename, 'size': 0} def ffprobe(filename): p = subprocess.Popen([ cmd('ffprobe'), '-show_format', '-show_chapters', '-show_streams', '-print_format', 'json', '-i', filename ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) info, error = p.communicate() ffinfo = json.loads(info) def fix_value(key, value): if key == 'r_frame_rate': value = value.replace('/', ':') elif key == 'bit_rate': value = float(value) / 1000 elif key == 'duration': value = float(value) elif key == 'size': value = int(value) return value info = {} if not 'format' in ffinfo: info['error'] = 'badfile' else: for key in ('duration', 'size', 'bit_rate'): if key in ffinfo['format']: info[{ 'bit_rate': 'bitrate' }.get(key, key)] = fix_value(key, ffinfo['format'][key]) info['audio'] = [] info['video'] = [] info['metadata'] = ffinfo['format'].get('tags', {}) chapters = ffinfo.get('chapters', []) if chapters: info['chapters'] = [ { 'in': float(chapter['start_time']), 'out': float(chapter['end_time']), 'value': chapter.get('tags', {}).get('title') } for chapter in chapters if chapter.get('tags', {}).get('title') ] for s in ffinfo['streams']: tags = s.pop('tags', {}) language = None for t in tags: if t == 'language': language = tags[t] else: info['metadata'][t] = tags[t] if s.get('codec_type') in ('audio', 'video'): stream = {} if language and language != 'und': stream['language'] = language keys = [ 'codec_name', 'width', 'height', 'bit_rate', 'index', 'display_aspect_ratio', 'sample_rate', 'channels', ] if s['codec_type'] == 'video': keys += [ 'sample_aspect_ratio', 'r_frame_rate', 'pix_fmt', ] for key in keys: if key in s: stream[{ 'codec_name': 'codec', 'bit_rate': 'bitrate', 'index': 'id', 'r_frame_rate': 'framerate', 'sample_rate': 'samplerate', 'pix_fmt': 'pixel_format', 'sample_aspect_ratio': 'pixel_aspect_ratio', }.get(key, key)] = fix_value(key, s[key]) info[s['codec_type']].append(stream) else: pass # print s for v in info['video']: k = 'display_aspect_ratio' if k not in v and 'width' in v \ or (k in v and v[k] == '0:1'): v[k] = '%d:%d' % (v['width'], v['height']) v['pixel_aspect_ratio'] = '1:1' info['oshash'] = oshash(filename) info['path'] = filename if 'size' not in info: info['size'] = os.path.getsize(filename) return info def makedirs(path): if not os.path.exists(path): try: os.makedirs(path) except OSError as e: if e.errno != 17: raise def copy_file(source, target, verbose=False): if verbose: print('copying', source, 'to', target) write_path(target) shutil.copyfile(source, target) def read_file(file, verbose=False): if verbose: print('reading', file) f = open(file, 'rb') data = f.read() f.close() return data def read_json(file, verbose=False): if verbose: print('reading', file) with open(file) as fd: data = json.load(fd) return data def write_file(file, data, verbose=False): if verbose: print('writing', file) if not isinstance(data, bytes): data = data.encode('utf-8') write_path(file) f = open(file, 'wb') f.write(data) f.close() return len(data) def write_image(file, image, verbose=False): if verbose: print('writing', file) write_path(file) image.save(file) def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False): data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys) write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose) def write_link(source, target, verbose=False): if verbose: print('linking', source, 'to', target) write_path(target) if os.path.exists(target): os.unlink(target) os.symlink(source, target) def write_path(file): path = os.path.split(file)[0] if path and not os.path.exists(path): os.makedirs(path)