python-ox/ox/file.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2008
from __future__ import division
import os
import hashlib
import re
import sys
import struct
import subprocess

from ox.utils import json

__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']

def sha1sum(filename):
    sha1 = hashlib.sha1()
    file=open(filename)
    buffer=file.read(4096)
    while buffer:
        sha1.update(buffer)
        buffer=file.read(4096)
    file.close()
    return sha1.hexdigest()

'''
    os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
    plus modification for files < 64k, buffer is filled with file data and padded with 0
'''
def oshash(filename):
    try:
        longlongformat = 'q'  # long long
        bytesize = struct.calcsize(longlongformat)

        f = open(filename, "rb")

        filesize = os.path.getsize(filename)
        hash = filesize
        if filesize < 65536:
            for x in range(int(filesize/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
        else:
            for x in range(int(65536/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
            f.seek(max(0,filesize-65536),0)
            for x in range(int(65536/bytesize)):
                buffer = f.read(bytesize)
                (l_value,)= struct.unpack(longlongformat, buffer)
                hash += l_value
                hash = hash & 0xFFFFFFFFFFFFFFFF
        f.close()
        returnedhash =  "%016x" % hash
        return returnedhash
    except(IOError):
        return "IOError"

def avinfo(filename):
    if os.path.getsize(filename):
        p = subprocess.Popen(['ffmpeg2theora', '--info', filename],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        info, error = p.communicate()
        try:
            info = json.loads(info)
        except:
            #remove metadata, can be broken
            reg = re.compile('"metadata": {.*?},', re.DOTALL)
            info = re.sub(reg, '', info)
            info = json.loads(info)
        if 'video' in info:
            for v in info['video']:
                if not 'display_aspect_ratio' in v and 'width' in v:
                    v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
                    v['pixel_aspect_ratio'] = '1:1'
        return info

    return {'path': filename, 'size': 0}

def makedirs(path):
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except OSError, e:
            if e.errno != 17:
                raise

def copy_file(source, target, verbose=False):
    if verbose:
        print 'copying', source, 'to', target
    write_file(target, read_file(source))

def read_file(file, verbose=False):
    if verbose:
        print 'reading', file
    f = open(file)
    data = f.read()
    f.close()
    return data

def read_json(file, verbose=False):
    return json.loads(read_file(file, verbose=verbose))

def write_file(file, data, verbose=False):
    if verbose:
        print 'writing', file
    write_path(file)
    f = open(file, 'w')
    f.write(data)
    f.close()
    return len(data)

def write_json(file, data, indent=0, sort_keys=False, verbose=False):
    data = json.dumps(data, indent=indent, sort_keys=sort_keys)
    write_file(file, data, verbose=verbose)

def write_link(source, target, verbose=False):
    if verbose:
        print 'linking', source, 'to', target
    write_path(target)
    if os.path.exists(target):
        os.unlink(target)
    os.symlink(source, target)

def write_path(file):
    path = os.path.split(file)[0]
    if path and not os.path.exists(path):
        os.makedirs(path)
add some functions 2008-04-27 16:54:37 +00:00			`# -- coding: utf-8 --`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:21:21 +00:00			`# vi:si:et:sw=4:sts=4:ts=4`
move and rename some 2008-07-06 13:00:06 +00:00			`# GPL 2008`
add oshash 2009-05-28 17:00:30 +00:00			`from __future__ import division`
add some functions 2008-04-27 16:54:37 +00:00			`import os`
use hashlib instead of sha 2009-03-16 17:15:14 +00:00			`import hashlib`
work around ffmpeg2theora problems with some metadata output 2010-11-15 16:35:07 +00:00			`import re`
add oshash 2009-05-28 17:00:30 +00:00			`import sys`
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`import struct`
avinfo, get info about file 2009-08-07 11:35:28 +00:00			`import subprocess`

import json/simplejson in one place and use that 2010-07-28 13:08:06 +00:00			`from ox.utils import json`
add oshash 2009-05-28 17:00:30 +00:00
ox.makedirs 2011-04-06 13:24:32 +00:00			`__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']`
add some functions 2008-04-27 16:54:37 +00:00
			`def sha1sum(filename):`
use hashlib instead of sha 2009-03-16 17:15:14 +00:00			`sha1 = hashlib.sha1()`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:21:21 +00:00			`file=open(filename)`
add some functions 2008-04-27 16:54:37 +00:00			`buffer=file.read(4096)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:21:21 +00:00			`while buffer:`
			`sha1.update(buffer)`
			`buffer=file.read(4096)`
			`file.close()`
			`return sha1.hexdigest()`
add some functions 2008-04-27 16:54:37 +00:00
add oshash 2009-05-28 17:00:30 +00:00			`'''`
			`os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes`
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`plus modification for files < 64k, buffer is filled with file data and padded with 0`
add oshash 2009-05-28 17:00:30 +00:00			`'''`
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`def oshash(filename):`
			`try:`
			`longlongformat = 'q' # long long`
			`bytesize = struct.calcsize(longlongformat)`
add oshash 2009-05-28 17:00:30 +00:00
deal with files < 64k in oshash 2009-06-14 19:22:47 +00:00			`f = open(filename, "rb")`

			`filesize = os.path.getsize(filename)`
			`hash = filesize`
			`if filesize < 65536:`
			`for x in range(int(filesize/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number`
			`else:`
			`for x in range(int(65536/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number`
			`f.seek(max(0,filesize-65536),0)`
			`for x in range(int(65536/bytesize)):`
			`buffer = f.read(bytesize)`
			`(l_value,)= struct.unpack(longlongformat, buffer)`
			`hash += l_value`
			`hash = hash & 0xFFFFFFFFFFFFFFFF`
			`f.close()`
			`returnedhash = "%016x" % hash`
			`return returnedhash`
			`except(IOError):`
add oshash 2009-05-28 17:00:30 +00:00			`return "IOError"`
add some functions 2008-04-27 16:54:37 +00:00
avinfo, get info about file 2009-08-07 11:35:28 +00:00			`def avinfo(filename):`
avinfo should not fail for emptpy files 2010-02-27 11:47:13 +00:00			`if os.path.getsize(filename):`
f2t sometimes does not list display aspect ratio, set it to width/height in that case 2011-03-26 11:12:56 +00:00			`p = subprocess.Popen(['ffmpeg2theora', '--info', filename],`
			`stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
avinfo should not fail for emptpy files 2010-02-27 11:47:13 +00:00			`info, error = p.communicate()`
work around ffmpeg2theora problems with some metadata output 2010-11-15 16:35:07 +00:00			`try:`
			`info = json.loads(info)`
			`except:`
			`#remove metadata, can be broken`
			`reg = re.compile('"metadata": {.*?},', re.DOTALL)`
			`info = re.sub(reg, '', info)`
			`info = json.loads(info)`
f2t sometimes does not list display aspect ratio, set it to width/height in that case 2011-03-26 11:12:56 +00:00			`if 'video' in info:`
			`for v in info['video']:`
more format workarounds in avinfo 2011-04-19 13:42:04 +00:00			`if not 'display_aspect_ratio' in v and 'width' in v:`
f2t sometimes does not list display aspect ratio, set it to width/height in that case 2011-03-26 11:12:56 +00:00			`v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])`
			`v['pixel_aspect_ratio'] = '1:1'`
work around ffmpeg2theora problems with some metadata output 2010-11-15 16:35:07 +00:00			`return info`

avinfo should not fail for emptpy files 2010-02-27 11:47:13 +00:00			`return {'path': filename, 'size': 0}`
ox.makedirs 2011-04-06 13:24:32 +00:00
			`def makedirs(path):`
			`if not os.path.exists(path):`
			`try:`
			`os.makedirs(path)`
			`except OSError, e:`
			`if e.errno != 17:`
			`raise`
update file module, add get_sort_name and get_sort_title to text module 2011-10-11 14:14:29 +00:00
			`def copy_file(source, target, verbose=False):`
			`if verbose:`
			`print 'copying', source, 'to', target`
			`write_file(target, read_file(source))`

			`def read_file(file, verbose=False):`
			`if verbose:`
			`print 'reading', file`
			`f = open(file)`
			`data = f.read()`
			`f.close()`
			`return data`

			`def read_json(file, verbose=False):`
			`return json.loads(read_file(file, verbose=verbose))`

			`def write_file(file, data, verbose=False):`
			`if verbose:`
			`print 'writing', file`
			`write_path(file)`
			`f = open(file, 'w')`
			`f.write(data)`
			`f.close()`
			`return len(data)`

			`def write_json(file, data, indent=0, sort_keys=False, verbose=False):`
			`data = json.dumps(data, indent=indent, sort_keys=sort_keys)`
			`write_file(file, data, verbose=verbose)`

			`def write_link(source, target, verbose=False):`
			`if verbose:`
			`print 'linking', source, 'to', target`
			`write_path(target)`
			`if os.path.exists(target):`
			`os.unlink(target)`
			`os.symlink(source, target)`

			`def write_path(file):`
			`path = os.path.split(file)[0]`
			`if path and not os.path.exists(path):`
			`os.makedirs(path)`