python-ox/ox/file.py

415 lines
14 KiB
Python
Raw Normal View History

2008-04-27 16:54:37 +00:00
# -*- coding: utf-8 -*-
2008-06-19 09:21:21 +00:00
# vi:si:et:sw=4:sts=4:ts=4
2008-07-06 13:00:06 +00:00
# GPL 2008
from __future__ import division, print_function
2017-01-07 11:11:05 +00:00
from distutils.spawn import find_executable
from glob import glob
2009-03-16 17:15:14 +00:00
import hashlib
2017-01-07 11:11:05 +00:00
import os
import re
import shutil
2017-01-07 11:11:05 +00:00
import sqlite3
2009-06-14 19:22:47 +00:00
import struct
2009-08-07 11:35:28 +00:00
import subprocess
2014-09-30 19:04:46 +00:00
from .utils import json
2009-05-28 17:00:30 +00:00
2017-01-07 11:11:05 +00:00
__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs', 'iexists']
2008-04-27 16:54:37 +00:00
EXTENSIONS = {
'audio': [
2016-08-06 11:17:10 +00:00
'aac', 'aif', 'aiff', 'amr',
2017-05-21 09:19:43 +00:00
'flac', 'm4a', 'mp3', 'oga', 'ogg', 'wav', 'wma', 'opus'
],
'image': [
'bmp', 'gif', 'jpeg', 'jpg', 'png', 'svg', 'webp'
],
'subtitle': [
2016-07-01 16:02:42 +00:00
'idx', 'srt', 'sub', 'vtt'
],
'video': [
'3gp',
2016-10-26 21:38:05 +00:00
'avi', 'divx', 'dv', 'flv', 'm2t', 'm2ts', 'm4v', 'mkv', 'mov', 'mp4',
2017-02-16 16:15:33 +00:00
'mpeg', 'mpg', 'mts', 'ogm', 'ogv', 'rm', 'rmvb', 'vob', 'webm', 'wmv', 'asf',
2016-06-08 13:32:46 +00:00
'mod', 'tod', # http://en.wikipedia.org/wiki/MOD_and_TOD
2019-07-22 15:11:46 +00:00
'mxf', 'ts'
],
}
2012-06-11 10:11:06 +00:00
def cmd(program):
local = os.path.expanduser('~/.ox/bin/%s' % program)
if os.path.exists(local):
program = local
return program
def _get_file_cache():
import ox.cache
path = ox.cache.cache_path()
if path.startswith('fs:'):
path = path[3:]
return os.path.join(path, 'files.sqlite')
2015-05-23 19:44:37 +00:00
def cache(filename, type='oshash', update=False):
conn = sqlite3.connect(_get_file_cache(), timeout=10)
conn.row_factory = sqlite3.Row
if not cache.init:
c = conn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)')
c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)')
c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)')
conn.commit()
cache.init = True
c = conn.cursor()
c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, ))
stat = os.stat(filename)
row = None
h = None
sha1 = None
info = ''
for row in c:
if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):
2015-05-23 19:44:37 +00:00
if not update:
value = row[type]
if value:
if type == 'info':
value = json.loads(value)
return value
h = row['oshash']
sha1 = row['sha1']
info = row['info']
if type == 'oshash':
value = h = oshash(filename, cached=False)
elif type == 'sha1':
value = sha1 = sha1sum(filename, cached=False)
elif type == 'info':
value = avinfo(filename, cached=False)
info = json.dumps(value)
t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info)
with conn:
sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)'
c.execute(sql, t)
return value
cache.init = None
def cleanup_cache():
conn = sqlite3.connect(_get_file_cache(), timeout=10)
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute('SELECT path FROM cache')
paths = [r[0] for r in c]
for path in paths:
if not os.path.exists(path):
c.execute('DELETE FROM cache WHERE path = ?', (path, ))
conn.commit()
c.execute('VACUUM')
conn.commit()
def sha1sum(filename, cached=False):
if cached:
return cache(filename, 'sha1')
2009-03-16 17:15:14 +00:00
sha1 = hashlib.sha1()
2014-09-30 19:30:25 +00:00
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(128*sha1.block_size), b''):
sha1.update(chunk)
2008-06-19 09:21:21 +00:00
return sha1.hexdigest()
2008-04-27 16:54:37 +00:00
2009-05-28 17:00:30 +00:00
'''
os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
2009-06-14 19:22:47 +00:00
plus modification for files < 64k, buffer is filled with file data and padded with 0
2009-05-28 17:00:30 +00:00
'''
def oshash(filename, cached=True):
if cached:
return cache(filename, 'oshash')
2009-06-14 19:22:47 +00:00
try:
longlongformat = 'q' # long long
bytesize = struct.calcsize(longlongformat)
2009-05-28 17:00:30 +00:00
2009-06-14 19:22:47 +00:00
f = open(filename, "rb")
filesize = os.path.getsize(filename)
hash = filesize
if filesize < 65536:
for x in range(int(filesize/bytesize)):
buffer = f.read(bytesize)
2016-06-08 13:32:46 +00:00
(l_value,) = struct.unpack(longlongformat, buffer)
2009-06-14 19:22:47 +00:00
hash += l_value
2016-06-08 13:32:46 +00:00
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
2009-06-14 19:22:47 +00:00
else:
for x in range(int(65536/bytesize)):
buffer = f.read(bytesize)
2016-06-08 13:32:46 +00:00
(l_value,) = struct.unpack(longlongformat, buffer)
2009-06-14 19:22:47 +00:00
hash += l_value
2016-06-08 13:32:46 +00:00
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, filesize-65536), 0)
2009-06-14 19:22:47 +00:00
for x in range(int(65536/bytesize)):
buffer = f.read(bytesize)
2016-06-08 13:32:46 +00:00
(l_value,) = struct.unpack(longlongformat, buffer)
2009-06-14 19:22:47 +00:00
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
2016-06-08 13:32:46 +00:00
returnedhash = "%016x" % hash
2009-06-14 19:22:47 +00:00
return returnedhash
2016-06-08 13:32:46 +00:00
except IOError:
2009-05-28 17:00:30 +00:00
return "IOError"
2008-04-27 16:54:37 +00:00
def avinfo(filename, cached=True):
if cached:
return cache(filename, 'info')
if os.path.getsize(filename):
2015-01-03 09:58:21 +00:00
if find_executable('ffprobe'):
return ffprobe(filename)
2012-06-11 10:11:06 +00:00
ffmpeg2theora = cmd('ffmpeg2theora')
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2016-06-08 13:32:46 +00:00
stdout, error = p.communicate()
stdout = stdout.decode('utf-8')
version = stdout.split('\n')[0].split(' - ')[0].split(' ')[-1]
if version < '0.27':
raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)
2012-02-24 17:07:55 +00:00
p = subprocess.Popen([ffmpeg2theora, '--info', filename],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2016-06-08 13:32:46 +00:00
stdout, error = p.communicate()
2016-08-23 16:11:20 +00:00
stdout = stdout.decode('utf-8')
try:
2016-06-08 13:32:46 +00:00
info = json.loads(stdout)
except:
2016-06-08 13:32:46 +00:00
# remove metadata, can be broken
reg = re.compile('"metadata": {.*?},', re.DOTALL)
2016-06-08 13:32:46 +00:00
stdout = re.sub(reg, '', stdout)
info = json.loads(stdout)
if 'video' in info:
for v in info['video']:
2016-06-08 13:32:46 +00:00
if 'display_aspect_ratio' not in v and 'width' in v:
v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
v['pixel_aspect_ratio'] = '1:1'
if len(info.get('audio', [])) > 1:
if 'metadata' in info['audio'][0]:
for stream in info['audio']:
language = stream.get('metadata', {}).get('language')
if language and language != 'und':
stream['language'] = language[0]
else:
ffmpeg = cmd('ffmpeg')
p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
2016-06-08 13:32:46 +00:00
stderr = stderr.decode('utf-8')
languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l]
if len(languages) == len(info['audio']):
for i, stream in enumerate(info['audio']):
language = languages[i]
if language and language[0] != 'und':
stream['language'] = language[0]
2017-05-21 09:19:43 +00:00
fix_coverart(info)
return info
return {'path': filename, 'size': 0}
2011-04-06 13:24:32 +00:00
2012-06-11 10:11:06 +00:00
def ffprobe(filename):
p = subprocess.Popen([
cmd('ffprobe'),
'-show_format',
2016-06-16 13:55:12 +00:00
'-show_chapters',
2012-06-11 10:11:06 +00:00
'-show_streams',
'-print_format',
'json',
'-i', filename
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
info, error = p.communicate()
2016-08-23 16:11:20 +00:00
info = info.decode('utf-8')
2012-06-11 10:11:06 +00:00
ffinfo = json.loads(info)
def fix_value(key, value):
if key == 'r_frame_rate':
value = value.replace('/', ':')
elif key == 'bit_rate':
value = float(value) / 1000
elif key == 'duration':
value = float(value)
elif key == 'size':
value = int(value)
return value
info = {}
2017-01-07 11:11:05 +00:00
if 'format' not in ffinfo:
2014-12-24 22:18:29 +00:00
info['error'] = 'badfile'
else:
for key in ('duration', 'size', 'bit_rate'):
if key in ffinfo['format']:
info[{
'bit_rate': 'bitrate'
}.get(key, key)] = fix_value(key, ffinfo['format'][key])
2014-12-24 22:18:29 +00:00
info['audio'] = []
info['video'] = []
info['metadata'] = ffinfo['format'].get('tags', {})
2016-06-16 13:55:12 +00:00
chapters = ffinfo.get('chapters', [])
if chapters:
info['chapters'] = [
{
'in': float(chapter['start_time']),
'out': float(chapter['end_time']),
'value': chapter.get('tags', {}).get('title')
}
for chapter in chapters if chapter.get('tags', {}).get('title')
]
2014-12-24 22:18:29 +00:00
for s in ffinfo['streams']:
2016-06-08 13:32:46 +00:00
tags = s.pop('tags', {})
2014-12-24 22:18:29 +00:00
language = None
for t in tags:
if t == 'language':
language = tags[t]
else:
info['metadata'][t] = tags[t]
if s.get('codec_type') in ('audio', 'video'):
stream = {}
if language and language != 'und':
stream['language'] = language
keys = [
'codec_name',
'width',
'height',
'bit_rate',
'index',
'display_aspect_ratio',
'sample_rate',
'channels',
2012-06-11 10:11:06 +00:00
]
2014-12-24 22:18:29 +00:00
if s['codec_type'] == 'video':
keys += [
'sample_aspect_ratio',
'r_frame_rate',
'pix_fmt',
]
2012-06-11 10:11:06 +00:00
2014-12-24 22:18:29 +00:00
for key in keys:
if key in s:
stream[{
'codec_name': 'codec',
'bit_rate': 'bitrate',
'index': 'id',
'r_frame_rate': 'framerate',
'sample_rate': 'samplerate',
'pix_fmt': 'pixel_format',
'sample_aspect_ratio': 'pixel_aspect_ratio',
}.get(key, key)] = fix_value(key, s[key])
info[s['codec_type']].append(stream)
2019-08-01 14:28:00 +00:00
elif s.get('codec_type') == 'subtitle':
info['subtitles'] = info.get('subtitles', [])
stream = {}
2019-08-01 18:54:04 +00:00
if language and language != 'und':
stream['language'] = language
2019-08-01 14:28:00 +00:00
for key in (
2019-08-01 18:54:04 +00:00
'codec_name',
'language',
'width',
'height',
2019-08-01 14:28:00 +00:00
):
if key in s:
stream[{
'codec_name': 'codec',
}.get(key, key)] = s[key]
info['subtitles'].append(stream)
2014-12-24 22:18:29 +00:00
else:
pass
2016-06-08 13:32:46 +00:00
# print s
2014-12-24 22:18:29 +00:00
for v in info['video']:
if 'rotate' in info.get('metadata', {}) and int(info['metadata']['rotate']) in (-180, -90, 90, 180):
2017-01-07 11:11:21 +00:00
v['width'], v['height'] = v['height'], v['width']
2014-12-24 22:18:29 +00:00
k = 'display_aspect_ratio'
2016-06-08 13:32:46 +00:00
if k not in v and 'width' in v \
2014-12-24 22:18:29 +00:00
or (k in v and v[k] == '0:1'):
v[k] = '%d:%d' % (v['width'], v['height'])
v['pixel_aspect_ratio'] = '1:1'
info['oshash'] = oshash(filename)
info['path'] = filename
2016-06-08 13:32:46 +00:00
if 'size' not in info:
2015-04-24 14:09:31 +00:00
info['size'] = os.path.getsize(filename)
2017-05-21 09:19:43 +00:00
fix_coverart(info)
return info
def fix_coverart(info):
if info.get('video') \
and info['path'].split('.')[-1] in EXTENSIONS['audio'] \
and info['video'][0]['codec'] in EXTENSIONS['image'] + ['mjpeg']:
info['cover'] = info.pop('video')
info['video'] = []
2012-06-11 10:11:06 +00:00
return info
2011-04-06 13:24:32 +00:00
def makedirs(path):
if not os.path.exists(path):
try:
os.makedirs(path)
2014-09-30 19:04:46 +00:00
except OSError as e:
2011-04-06 13:24:32 +00:00
if e.errno != 17:
raise
def copy_file(source, target, verbose=False):
if verbose:
2014-09-30 19:04:46 +00:00
print('copying', source, 'to', target)
write_path(target)
shutil.copyfile(source, target)
def read_file(file, verbose=False):
if verbose:
2014-09-30 19:04:46 +00:00
print('reading', file)
2014-09-30 21:19:19 +00:00
f = open(file, 'rb')
data = f.read()
f.close()
return data
def read_json(file, verbose=False):
2013-11-15 15:16:21 +00:00
if verbose:
2014-09-30 19:04:46 +00:00
print('reading', file)
2013-11-15 15:16:21 +00:00
with open(file) as fd:
data = json.load(fd)
return data
def write_file(file, data, verbose=False):
if verbose:
2014-09-30 19:04:46 +00:00
print('writing', file)
2014-09-30 21:19:19 +00:00
if not isinstance(data, bytes):
data = data.encode('utf-8')
write_path(file)
2014-09-30 21:19:19 +00:00
f = open(file, 'wb')
f.write(data)
f.close()
return len(data)
def write_image(file, image, verbose=False):
if verbose:
2014-09-30 19:04:46 +00:00
print('writing', file)
write_path(file)
image.save(file)
def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose=False):
data = json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, sort_keys=sort_keys)
write_file(file, data if ensure_ascii else data.encode('utf-8'), verbose=verbose)
def write_link(source, target, verbose=False):
if verbose:
2014-09-30 19:04:46 +00:00
print('linking', source, 'to', target)
write_path(target)
if os.path.exists(target):
os.unlink(target)
os.symlink(source, target)
def write_path(file):
path = os.path.split(file)[0]
if path and not os.path.exists(path):
os.makedirs(path)
2017-01-07 11:11:05 +00:00
def iexists(path):
parts = path.split(os.sep)
name = parts[-1].lower()
if len(parts) == 1:
folder = '.'
else:
folder = os.path.dirname(path)
2017-06-14 11:00:44 +00:00
try:
files = os.listdir(folder)
except FileNotFoundError:
return False
2017-01-07 11:11:05 +00:00
files = {os.path.basename(f).lower() for f in files}
return name in files