deal with files < 64k in oshash

This commit is contained in:
j 2009-06-14 21:22:47 +02:00
parent c3c9c49788
commit a2bf2a77d1

View file

@ -5,7 +5,7 @@ from __future__ import division
import os import os
import hashlib import hashlib
import sys import sys
import struct import struct
def sha1sum(filename): def sha1sum(filename):
@ -20,37 +20,38 @@ def sha1sum(filename):
''' '''
os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
plus modification for files < 64k, buffer is filled with file data and padded with 0
''' '''
def oshash(filename): def oshash(filename):
try: try:
longlongformat = 'q' # long long longlongformat = 'q' # long long
bytesize = struct.calcsize(longlongformat) bytesize = struct.calcsize(longlongformat)
f = open(filename, "rb")
filesize = os.path.getsize(filename)
hash = filesize
if filesize < 65536:
return "SizeError"
for x in range(int(65536/bytesize)):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
f.seek(max(0,filesize-65536),0) f = open(filename, "rb")
for x in range(int(65536/bytesize)):
buffer = f.read(bytesize) filesize = os.path.getsize(filename)
(l_value,)= struct.unpack(longlongformat, buffer) hash = filesize
hash += l_value if filesize < 65536:
hash = hash & 0xFFFFFFFFFFFFFFFF for x in range(int(filesize/bytesize)):
buffer = f.read(bytesize)
f.close() (l_value,)= struct.unpack(longlongformat, buffer)
returnedhash = "%016x" % hash hash += l_value
return returnedhash hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
except(IOError): else:
for x in range(int(65536/bytesize)):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
f.seek(max(0,filesize-65536),0)
for x in range(int(65536/bytesize)):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
returnedhash = "%016x" % hash
return returnedhash
except(IOError):
return "IOError" return "IOError"