add cache for file functions: oshash/sha1sum/avinfo
This commit is contained in:
parent
fffd0a17f7
commit
771d76d8bb
1 changed files with 72 additions and 4 deletions
76
ox/file.py
76
ox/file.py
|
@ -5,21 +5,85 @@ from __future__ import division
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
from ox.utils import json
|
from ox.utils import json
|
||||||
|
|
||||||
__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']
|
__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']
|
||||||
|
|
||||||
|
|
||||||
def cmd(program):
|
def cmd(program):
|
||||||
local = os.path.expanduser('~/.ox/bin/%s' % program)
|
local = os.path.expanduser('~/.ox/bin/%s' % program)
|
||||||
if os.path.exists(local):
|
if os.path.exists(local):
|
||||||
program = local
|
program = local
|
||||||
return program
|
return program
|
||||||
|
|
||||||
def sha1sum(filename):
|
def _get_file_cache():
|
||||||
|
import ox.cache
|
||||||
|
return os.path.join(ox.cache.cache_path(), 'files.sqlite')
|
||||||
|
|
||||||
|
def cache(filename, type='oshash'):
|
||||||
|
conn = sqlite3.connect(_get_file_cache(), timeout=10)
|
||||||
|
conn.text_factory = str
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
if not cache.init:
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute('CREATE TABLE IF NOT EXISTS cache (path varchar(1024) unique, oshash varchar(16), sha1 varchar(42), size int, mtime int, info text)')
|
||||||
|
c.execute('CREATE INDEX IF NOT EXISTS cache_oshash ON cache (oshash)')
|
||||||
|
c.execute('CREATE INDEX IF NOT EXISTS cache_sha1 ON cache (sha1)')
|
||||||
|
conn.commit()
|
||||||
|
cache.init = True
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute('SELECT oshash, sha1, info, size, mtime FROM cache WHERE path = ?', (filename, ))
|
||||||
|
stat = os.stat(filename)
|
||||||
|
row = None
|
||||||
|
h = None
|
||||||
|
sha1 = None
|
||||||
|
info = ''
|
||||||
|
for row in c:
|
||||||
|
if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):
|
||||||
|
value = row[type]
|
||||||
|
if value:
|
||||||
|
if type == 'info':
|
||||||
|
value = json.loads(value)
|
||||||
|
return value
|
||||||
|
h = row['oshash']
|
||||||
|
sha1 = row['sha1']
|
||||||
|
info = row['info']
|
||||||
|
if type == 'oshash':
|
||||||
|
value = h = oshash(filename, cached=False)
|
||||||
|
elif type == 'sha1':
|
||||||
|
value = sha1 = sha1sum(filename, cached=False)
|
||||||
|
elif type == 'info':
|
||||||
|
value = avinfo(filename, cached=False)
|
||||||
|
info = json.dumps(value)
|
||||||
|
t = (filename, h, sha1, stat.st_size, int(stat.st_mtime), info)
|
||||||
|
with conn:
|
||||||
|
sql = u'INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?)'
|
||||||
|
c.execute(sql, t)
|
||||||
|
return value
|
||||||
|
cache.init = None
|
||||||
|
|
||||||
|
def cleanup_cache():
|
||||||
|
conn = sqlite3.connect(_get_file_cache(), timeout=10)
|
||||||
|
conn.text_factory = str
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute('SELECT path FROM cache')
|
||||||
|
paths = [r[0] for r in c]
|
||||||
|
for path in paths:
|
||||||
|
if not os.path.exists(path):
|
||||||
|
c.execute('DELETE FROM cache WHERE path = ?', (path, ))
|
||||||
|
conn.commit()
|
||||||
|
c.execute('VACUUM')
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def sha1sum(filename, cached=False):
|
||||||
|
if cached:
|
||||||
|
return cache(filename, 'sha1')
|
||||||
sha1 = hashlib.sha1()
|
sha1 = hashlib.sha1()
|
||||||
file=open(filename)
|
file=open(filename)
|
||||||
buffer=file.read(4096)
|
buffer=file.read(4096)
|
||||||
|
@ -33,7 +97,9 @@ def sha1sum(filename):
|
||||||
os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
||||||
plus modification for files < 64k, buffer is filled with file data and padded with 0
|
plus modification for files < 64k, buffer is filled with file data and padded with 0
|
||||||
'''
|
'''
|
||||||
def oshash(filename):
|
def oshash(filename, cached=True):
|
||||||
|
if cached:
|
||||||
|
return cache(filename, 'oshash')
|
||||||
try:
|
try:
|
||||||
longlongformat = 'q' # long long
|
longlongformat = 'q' # long long
|
||||||
bytesize = struct.calcsize(longlongformat)
|
bytesize = struct.calcsize(longlongformat)
|
||||||
|
@ -66,7 +132,9 @@ def oshash(filename):
|
||||||
except(IOError):
|
except(IOError):
|
||||||
return "IOError"
|
return "IOError"
|
||||||
|
|
||||||
def avinfo(filename):
|
def avinfo(filename, cached=True):
|
||||||
|
if cached:
|
||||||
|
return cache(filename, 'info')
|
||||||
if os.path.getsize(filename):
|
if os.path.getsize(filename):
|
||||||
ffmpeg2theora = cmd('ffmpeg2theora')
|
ffmpeg2theora = cmd('ffmpeg2theora')
|
||||||
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
|
Loading…
Reference in a new issue