# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 # GPL 2008 from __future__ import division, print_function from glob import glob import hashlib import os import re import shutil import sqlite3 import struct import subprocess from .utils import json __all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs', 'iexists'] EXTENSIONS = { 'audio': [ 'aac', 'aif', 'aiff', 'amr', 'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma', 'opus', 'ra', # Real Audio ], 'image': [ 'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp' ], 'subtitle': [ 'idx', 'srt', 'sub', 'vtt' ], 'video': [ '3gp', 'avi', 'divx', 'dv', 'flv', 'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'vob', 'webm', 'wmv', 'asf', 'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD 'mxf', 'ts', 'dat', # VOD files 'rm', 'rmvb', # Real Media ], } def is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) def which(program): local = os.path.expanduser('~/.ox/bin/%s' % program) if os.path.exists(local): return local fpath, fname = os.path.split(program) if fpath: if is_exe(program): return program else: for path in os.environ.get("PATH", "").split(os.pathsep): exe_file = os.path.join(path, program) if is_exe(exe_file): return exe_file return None def cmd(program): local = os.path.expanduser('~/.ox/bin/%s' % program) if os.path.exists(local): program = local return program def _get_file_cache(): import ox.cache path = ox.cache.cache_path() if path.startswith('fs:'): path = path[3:] return os.path.join(path, 'files.sqlite') def cache(filename, type='oshash', update=False): conn = sqlite3.connect(_get_file_cache(), timeout=10) conn.row_factory = sqlite3.Row if not cache.init: c = conn.cursor() c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)') c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)') c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)') conn.commit() cache.init = True c = conn.cursor() c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, )) stat = os.stat(filename) row = None h = None sha1 = None info = '' for row in c: if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']): if not update: value = row[type] if value: if type == 'info': value = json.loads(value) return value h = row['oshash'] sha1 = row['sha1'] info = row['info'] if type == 'oshash': value = h = oshash(filename, cached=False) elif type == 'sha1': value = sha1 = sha1sum(filename, cached=False) elif type == 'info': value = avinfo(filename, cached=False) info = json.dumps(value) t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info) with conn: sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)' c.execute(sql, t) return value cache.init = None def cleanup_cache(): conn = sqlite3.connect(_get_file_cache(), timeout=10) conn.row_factory = sqlite3.Row c = conn.cursor() c.execute('SELECT path FROM cache') paths = [r[0] for r in c] for path in paths: if not os.path.exists(path): c.execute('DELETE FROM cache WHERE path = ?', (path, )) conn.commit() c.execute('VACUUM') conn.commit() def sha1sum(filename, cached=False): if cached: return cache(filename, 'sha1') sha1 = hashlib.sha1() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(128*sha1.block_size), b''): sha1.update(chunk) return sha1.hexdigest() ''' os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes plus modification for files < 64k, buffer is filled with file data and padded with 0 ''' def oshash(filename, cached=True): if cached: return cache(filename, 'oshash') try: longlongformat = 'q' # long long bytesize = struct.calcsize(longlongformat) f = open(filename, "rb") filesize = os.path.getsize(filename) hash = filesize if filesize < 65536: for x in range(int(filesize/bytesize)): buffer = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number else: for x in range(int(65536/bytesize)): buffer = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number f.seek(max(0, filesize-65536), 0) for x in range(int(65536/bytesize)): buffer = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF f.close() returnedhash = "%016x" % hash return returnedhash except IOError: return "IOError" def avinfo(filename, cached=True): if cached: return cache(filename, 'info') if os.path.getsize(filename): if which('ffprobe'): return ffprobe(filename) raise EnvironmentError('could to find ffprobe. please install ffmpeg') return {'path': filename, 'size': 0} def ffprobe(filename): p = subprocess.Popen([ cmd('ffprobe'), '-show_format', '-show_chapters', '-show_streams', '-print_format', 'json', '-i', filename ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) info, error = p.communicate() info = info.decode('utf-8') ffinfo = json.loads(info) def fix_value(key, value): if key == 'r_frame_rate': value = value.replace('/', ':') elif key == 'bit_rate': value = float(value) / 1000 elif key == 'duration': value = float(value) elif key == 'size': value = int(value) return value info = {} if 'format' not in ffinfo: info['error'] = 'badfile' else: for key in ('duration', 'size', 'bit_rate'): if key in ffinfo['format']: info[{ 'bit_rate': 'bitrate' }.get(key, key)] = fix_value(key, ffinfo['format'][key]) info['audio'] = [] info['video'] = [] info['metadata'] = ffinfo['format'].get('tags', {}) chapters = ffinfo.get('chapters', []) if chapters: info['chapters'] = [ { 'in': float(chapter['start_time']), 'out': float(chapter['end_time']), 'value': chapter.get('tags', {}).get('title') } for chapter in chapters if chapter.get('tags', {}).get('title') ] for s in ffinfo['streams']: tags = s.pop('tags', {}) side_data_list = s.pop('side_data_list', []) language = None for t in tags: if t == 'language': language = tags[t] else: info['metadata'][t] = tags[t] for kv in side_data_list: for k, v in kv.items(): info['metadata'][k] = v if s.get('codec_type') in ('audio', 'video'): stream = {} if language and language != 'und': stream['language'] = language keys = [ 'codec_name', 'width', 'height', 'bit_rate', 'index', 'display_aspect_ratio', 'sample_rate', 'channels', ] if s['codec_type'] == 'video': keys += [ 'sample_aspect_ratio', 'r_frame_rate', 'pix_fmt', ] for key in keys: if key in s: stream[{ 'codec_name': 'codec', 'bit_rate': 'bitrate', 'index': 'id', 'r_frame_rate': 'framerate', 'sample_rate': 'samplerate', 'pix_fmt': 'pixel_format', 'sample_aspect_ratio': 'pixel_aspect_ratio', }.get(key, key)] = fix_value(key, s[key]) if 'avg_frame_rate' in s and stream.get('framerate') == "90000:1": stream['framerate'] = fix_value('r_frame_rate', s['avg_frame_rate']) stream['force_framerate'] = True info[s['codec_type']].append(stream) elif s.get('codec_type') == 'subtitle': info['subtitles'] = info.get('subtitles', []) stream = {} if language and language != 'und': stream['language'] = language for key in ( 'codec_name', 'language', 'width', 'height', ): if key in s: stream[{ 'codec_name': 'codec', }.get(key, key)] = s[key] info['subtitles'].append(stream) else: pass # print s for v in info['video']: k = 'display_aspect_ratio' if 'rotate' in info.get('metadata', {}) and int(info['metadata']['rotate']) in (-90, 90): v['width'], v['height'] = v['height'], v['width'] if k in v: v[k] = ':'.join(reversed(v[k].split(':'))) elif 'rotation' in info.get('metadata', {}) and int(info['metadata']['rotation']) in (-90, 90): v['width'], v['height'] = v['height'], v['width'] if k in v: v[k] = ':'.join(reversed(v[k].split(':'))) if k not in v and 'width' in v \ or (k in v and v[k] == '0:1'): v[k] = '%d:%d' % (v['width'], v['height']) v['pixel_aspect_ratio'] = '1:1' info['oshash'] = oshash(filename) info['path'] = filename if 'size' not in info: info['size'] = os.path.getsize(filename) fix_coverart(info) return info def fix_coverart(info): if info.get('video') \ and info['path'].split('.')[-1] in EXTENSIONS['audio'] \ and info['video'][0]['codec'] in EXTENSIONS['image'] + ['mjpeg']: info['cover'] = info.pop('video') info['video'] = [] return info def makedirs(path): if not os.path.exists(path): try: os.makedirs(path) except OSError as e: if e.errno != 17: raise def copy_file(source, target, verbose=False): if verbose: print('copying', source, 'to', target) write_path(target) shutil.copyfile(source, target) def read_file(file, verbose=False): if verbose: print('reading', file) f = open(file, 'rb') data = f.read() f.close() return data def read_json(file, verbose=False): if verbose: print('reading', file) with open(file) as fd: data = json.load(fd) return data def write_file(file, data, verbose=False): if verbose: print('writing', file) if not isinstance(data, bytes): data = data.encode('utf-8') write_path(file) f = open(file, 'wb') f.write(data) f.close() return len(data) def write_image(file, image, verbose=False): if verbose: print('writing', file) write_path(file) image.save(file) def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False): data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys) write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose) def write_link(source, target, verbose=False): if verbose: print('linking', source, 'to', target) write_path(target) if os.path.exists(target): os.unlink(target) os.symlink(source, target) def write_path(file): path = os.path.split(file)[0] if path and not os.path.exists(path): os.makedirs(path) def iexists(path): parts = path.split(os.sep) name = parts[-1].lower() if len(parts) == 1: folder = '.' else: folder = os.path.dirname(path) try: files = os.listdir(folder) except FileNotFoundError: return False files = {os.path.basename(f).lower() for f in files} return name in files