openmedialibrary_platform/Darwin/lib/python2.7/site-packages/twisted/python/zipstream.py

# -*- test-case-name: twisted.python.test.test_zipstream -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.

"""
An incremental approach to unzipping files.  This allows you to unzip a little
bit of a file at a time, which means you can report progress as a file unzips.
"""

import zipfile
import os.path
import zlib
import struct


_fileHeaderSize = struct.calcsize(zipfile.structFileHeader)

class ChunkingZipFile(zipfile.ZipFile):
    """
    A C{ZipFile} object which, with L{readfile}, also gives you access to a
    file-like object for each entry.
    """

    def readfile(self, name):
        """
        Return file-like object for name.
        """
        if self.mode not in ("r", "a"):
            raise RuntimeError('read() requires mode "r" or "a"')
        if not self.fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")
        zinfo = self.getinfo(name)

        self.fp.seek(zinfo.header_offset, 0)

        fheader = self.fp.read(_fileHeaderSize)
        if fheader[0:4] != zipfile.stringFileHeader:
            raise zipfile.BadZipfile("Bad magic number for file header")

        fheader = struct.unpack(zipfile.structFileHeader, fheader)
        fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])

        if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:
            self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])

        if fname != zinfo.orig_filename:
            raise zipfile.BadZipfile(
                'File name in directory "%s" and header "%s" differ.' % (
                    zinfo.orig_filename, fname))

        if zinfo.compress_type == zipfile.ZIP_STORED:
            return ZipFileEntry(self, zinfo.compress_size)
        elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
            return DeflatedZipFileEntry(self, zinfo.compress_size)
        else:
            raise zipfile.BadZipfile(
                "Unsupported compression method %d for file %s" %
                    (zinfo.compress_type, name))


class _FileEntry(object):
    """
    Abstract superclass of both compressed and uncompressed variants of
    file-like objects within a zip archive.

    @ivar chunkingZipFile: a chunking zip file.
    @type chunkingZipFile: L{ChunkingZipFile}

    @ivar length: The number of bytes within the zip file that represent this
    file.  (This is the size on disk, not the number of decompressed bytes
    which will result from reading it.)

    @ivar fp: the underlying file object (that contains pkzip data).  Do not
    touch this, please.  It will quite likely move or go away.

    @ivar closed: File-like 'closed' attribute; True before this file has been
    closed, False after.
    @type closed: C{bool}

    @ivar finished: An older, broken synonym for 'closed'.  Do not touch this,
    please.
    @type finished: C{int}
    """
    def __init__(self, chunkingZipFile, length):
        """
        Create a L{_FileEntry} from a L{ChunkingZipFile}.
        """
        self.chunkingZipFile = chunkingZipFile
        self.fp = self.chunkingZipFile.fp
        self.length = length
        self.finished = 0
        self.closed = False


    def isatty(self):
        """
        Returns false because zip files should not be ttys
        """
        return False


    def close(self):
        """
        Close self (file-like object)
        """
        self.closed = True
        self.finished = 1
        del self.fp


    def readline(self):
        """
        Read a line.
        """
        bytes = ""
        for byte in iter(lambda : self.read(1), ""):
            bytes += byte
            if byte == "\n":
                break
        return bytes


    def next(self):
        """
        Implement next as file does (like readline, except raises StopIteration
        at EOF)
        """
        nextline = self.readline()
        if nextline:
            return nextline
        raise StopIteration()


    def readlines(self):
        """
        Returns a list of all the lines
        """
        return list(self)


    def xreadlines(self):
        """
        Returns an iterator (so self)
        """
        return self


    def __iter__(self):
        """
        Returns an iterator (so self)
        """
        return self


class ZipFileEntry(_FileEntry):
    """
    File-like object used to read an uncompressed entry in a ZipFile
    """

    def __init__(self, chunkingZipFile, length):
        _FileEntry.__init__(self, chunkingZipFile, length)
        self.readBytes = 0


    def tell(self):
        return self.readBytes


    def read(self, n=None):
        if n is None:
            n = self.length - self.readBytes
        if n == 0 or self.finished:
            return ''
        data = self.chunkingZipFile.fp.read(
            min(n, self.length - self.readBytes))
        self.readBytes += len(data)
        if self.readBytes == self.length or len(data) <  n:
            self.finished = 1
        return data


class DeflatedZipFileEntry(_FileEntry):
    """
    File-like object used to read a deflated entry in a ZipFile
    """

    def __init__(self, chunkingZipFile, length):
        _FileEntry.__init__(self, chunkingZipFile, length)
        self.returnedBytes = 0
        self.readBytes = 0
        self.decomp = zlib.decompressobj(-15)
        self.buffer = ""


    def tell(self):
        return self.returnedBytes


    def read(self, n=None):
        if self.finished:
            return ""
        if n is None:
            result = [self.buffer,]
            result.append(
                self.decomp.decompress(
                    self.chunkingZipFile.fp.read(
                        self.length - self.readBytes)))
            result.append(self.decomp.decompress("Z"))
            result.append(self.decomp.flush())
            self.buffer = ""
            self.finished = 1
            result = "".join(result)
            self.returnedBytes += len(result)
            return result
        else:
            while len(self.buffer) < n:
                data = self.chunkingZipFile.fp.read(
                    min(n, 1024, self.length - self.readBytes))
                self.readBytes += len(data)
                if not data:
                    result = (self.buffer
                              + self.decomp.decompress("Z")
                              + self.decomp.flush())
                    self.finished = 1
                    self.buffer = ""
                    self.returnedBytes += len(result)
                    return result
                else:
                    self.buffer += self.decomp.decompress(data)
            result = self.buffer[:n]
            self.buffer = self.buffer[n:]
            self.returnedBytes += len(result)
            return result


DIR_BIT = 16


def countZipFileChunks(filename, chunksize):
    """
    Predict the number of chunks that will be extracted from the entire
    zipfile, given chunksize blocks.
    """
    totalchunks = 0
    zf = ChunkingZipFile(filename)
    for info in zf.infolist():
        totalchunks += countFileChunks(info, chunksize)
    return totalchunks


def countFileChunks(zipinfo, chunksize):
    """
    Count the number of chunks that will result from the given C{ZipInfo}.

    @param zipinfo: a C{zipfile.ZipInfo} instance describing an entry in a zip
    archive to be counted.

    @return: the number of chunks present in the zip file.  (Even an empty file
    counts as one chunk.)
    @rtype: C{int}
    """
    count, extra = divmod(zipinfo.file_size, chunksize)
    if extra > 0:
        count += 1
    return count or 1


def unzipIterChunky(filename, directory='.', overwrite=0,
                    chunksize=4096):
    """
    Return a generator for the zipfile.  This implementation will yield after
    every chunksize uncompressed bytes, or at the end of a file, whichever
    comes first.

    The value it yields is the number of chunks left to unzip.
    """
    czf = ChunkingZipFile(filename, 'r')
    if not os.path.exists(directory):
        os.makedirs(directory)
    remaining = countZipFileChunks(filename, chunksize)
    names = czf.namelist()
    infos = czf.infolist()

    for entry, info in zip(names, infos):
        isdir = info.external_attr & DIR_BIT
        f = os.path.join(directory, entry)
        if isdir:
            # overwrite flag only applies to files
            if not os.path.exists(f):
                os.makedirs(f)
            remaining -= 1
            yield remaining
        else:
            # create the directory the file will be in first,
            # since we can't guarantee it exists
            fdir = os.path.split(f)[0]
            if not os.path.exists(fdir):
                os.makedirs(fdir)
            if overwrite or not os.path.exists(f):
                outfile = file(f, 'wb')
                fp = czf.readfile(entry)
                if info.file_size == 0:
                    remaining -= 1
                    yield remaining
                while fp.tell() < info.file_size:
                    hunk = fp.read(chunksize)
                    outfile.write(hunk)
                    remaining -= 1
                    yield remaining
                outfile.close()
            else:
                remaining -= countFileChunks(info, chunksize)
                yield remaining
Open Media Library Platform 2013-10-11 17:28:32 +00:00			`# -- test-case-name: twisted.python.test.test_zipstream --`
			`# Copyright (c) Twisted Matrix Laboratories.`
			`# See LICENSE for details.`

			`"""`
			`An incremental approach to unzipping files. This allows you to unzip a little`
			`bit of a file at a time, which means you can report progress as a file unzips.`
			`"""`

			`import zipfile`
			`import os.path`
			`import zlib`
			`import struct`


			`_fileHeaderSize = struct.calcsize(zipfile.structFileHeader)`

			`class ChunkingZipFile(zipfile.ZipFile):`
			`"""`
			`A C{ZipFile} object which, with L{readfile}, also gives you access to a`
			`file-like object for each entry.`
			`"""`

			`def readfile(self, name):`
			`"""`
			`Return file-like object for name.`
			`"""`
			`if self.mode not in ("r", "a"):`
			`raise RuntimeError('read() requires mode "r" or "a"')`
			`if not self.fp:`
			`raise RuntimeError(`
			`"Attempt to read ZIP archive that was already closed")`
			`zinfo = self.getinfo(name)`

			`self.fp.seek(zinfo.header_offset, 0)`

			`fheader = self.fp.read(_fileHeaderSize)`
			`if fheader[0:4] != zipfile.stringFileHeader:`
			`raise zipfile.BadZipfile("Bad magic number for file header")`

			`fheader = struct.unpack(zipfile.structFileHeader, fheader)`
			`fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])`

			`if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:`
			`self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])`

			`if fname != zinfo.orig_filename:`
			`raise zipfile.BadZipfile(`
			`'File name in directory "%s" and header "%s" differ.' % (`
			`zinfo.orig_filename, fname))`

			`if zinfo.compress_type == zipfile.ZIP_STORED:`
			`return ZipFileEntry(self, zinfo.compress_size)`
			`elif zinfo.compress_type == zipfile.ZIP_DEFLATED:`
			`return DeflatedZipFileEntry(self, zinfo.compress_size)`
			`else:`
			`raise zipfile.BadZipfile(`
			`"Unsupported compression method %d for file %s" %`
			`(zinfo.compress_type, name))`



			`class _FileEntry(object):`
			`"""`
			`Abstract superclass of both compressed and uncompressed variants of`
			`file-like objects within a zip archive.`

			`@ivar chunkingZipFile: a chunking zip file.`
			`@type chunkingZipFile: L{ChunkingZipFile}`

			`@ivar length: The number of bytes within the zip file that represent this`
			`file. (This is the size on disk, not the number of decompressed bytes`
			`which will result from reading it.)`

			`@ivar fp: the underlying file object (that contains pkzip data). Do not`
			`touch this, please. It will quite likely move or go away.`

			`@ivar closed: File-like 'closed' attribute; True before this file has been`
			`closed, False after.`
			`@type closed: C{bool}`

			`@ivar finished: An older, broken synonym for 'closed'. Do not touch this,`
			`please.`
			`@type finished: C{int}`
			`"""`
			`def __init__(self, chunkingZipFile, length):`
			`"""`
			`Create a L{_FileEntry} from a L{ChunkingZipFile}.`
			`"""`
			`self.chunkingZipFile = chunkingZipFile`
			`self.fp = self.chunkingZipFile.fp`
			`self.length = length`
			`self.finished = 0`
			`self.closed = False`


			`def isatty(self):`
			`"""`
			`Returns false because zip files should not be ttys`
			`"""`
			`return False`


			`def close(self):`
			`"""`
			`Close self (file-like object)`
			`"""`
			`self.closed = True`
			`self.finished = 1`
			`del self.fp`


			`def readline(self):`
			`"""`
			`Read a line.`
			`"""`
			`bytes = ""`
			`for byte in iter(lambda : self.read(1), ""):`
			`bytes += byte`
			`if byte == "\n":`
			`break`
			`return bytes`


			`def next(self):`
			`"""`
			`Implement next as file does (like readline, except raises StopIteration`
			`at EOF)`
			`"""`
			`nextline = self.readline()`
			`if nextline:`
			`return nextline`
			`raise StopIteration()`


			`def readlines(self):`
			`"""`
			`Returns a list of all the lines`
			`"""`
			`return list(self)`


			`def xreadlines(self):`
			`"""`
			`Returns an iterator (so self)`
			`"""`
			`return self`


			`def __iter__(self):`
			`"""`
			`Returns an iterator (so self)`
			`"""`
			`return self`



			`class ZipFileEntry(_FileEntry):`
			`"""`
			`File-like object used to read an uncompressed entry in a ZipFile`
			`"""`

			`def __init__(self, chunkingZipFile, length):`
			`_FileEntry.__init__(self, chunkingZipFile, length)`
			`self.readBytes = 0`


			`def tell(self):`
			`return self.readBytes`


			`def read(self, n=None):`
			`if n is None:`
			`n = self.length - self.readBytes`
			`if n == 0 or self.finished:`
			`return ''`
			`data = self.chunkingZipFile.fp.read(`
			`min(n, self.length - self.readBytes))`
			`self.readBytes += len(data)`
			`if self.readBytes == self.length or len(data) < n:`
			`self.finished = 1`
			`return data`



			`class DeflatedZipFileEntry(_FileEntry):`
			`"""`
			`File-like object used to read a deflated entry in a ZipFile`
			`"""`

			`def __init__(self, chunkingZipFile, length):`
			`_FileEntry.__init__(self, chunkingZipFile, length)`
			`self.returnedBytes = 0`
			`self.readBytes = 0`
			`self.decomp = zlib.decompressobj(-15)`
			`self.buffer = ""`


			`def tell(self):`
			`return self.returnedBytes`


			`def read(self, n=None):`
			`if self.finished:`
			`return ""`
			`if n is None:`
			`result = [self.buffer,]`
			`result.append(`
			`self.decomp.decompress(`
			`self.chunkingZipFile.fp.read(`
			`self.length - self.readBytes)))`
			`result.append(self.decomp.decompress("Z"))`
			`result.append(self.decomp.flush())`
			`self.buffer = ""`
			`self.finished = 1`
			`result = "".join(result)`
			`self.returnedBytes += len(result)`
			`return result`
			`else:`
			`while len(self.buffer) < n:`
			`data = self.chunkingZipFile.fp.read(`
			`min(n, 1024, self.length - self.readBytes))`
			`self.readBytes += len(data)`
			`if not data:`
			`result = (self.buffer`
			`+ self.decomp.decompress("Z")`
			`+ self.decomp.flush())`
			`self.finished = 1`
			`self.buffer = ""`
			`self.returnedBytes += len(result)`
			`return result`
			`else:`
			`self.buffer += self.decomp.decompress(data)`
			`result = self.buffer[:n]`
			`self.buffer = self.buffer[n:]`
			`self.returnedBytes += len(result)`
			`return result`



			`DIR_BIT = 16`


			`def countZipFileChunks(filename, chunksize):`
			`"""`
			`Predict the number of chunks that will be extracted from the entire`
			`zipfile, given chunksize blocks.`
			`"""`
			`totalchunks = 0`
			`zf = ChunkingZipFile(filename)`
			`for info in zf.infolist():`
			`totalchunks += countFileChunks(info, chunksize)`
			`return totalchunks`


			`def countFileChunks(zipinfo, chunksize):`
			`"""`
			`Count the number of chunks that will result from the given C{ZipInfo}.`

			`@param zipinfo: a C{zipfile.ZipInfo} instance describing an entry in a zip`
			`archive to be counted.`

			`@return: the number of chunks present in the zip file. (Even an empty file`
			`counts as one chunk.)`
			`@rtype: C{int}`
			`"""`
			`count, extra = divmod(zipinfo.file_size, chunksize)`
			`if extra > 0:`
			`count += 1`
			`return count or 1`



			`def unzipIterChunky(filename, directory='.', overwrite=0,`
			`chunksize=4096):`
			`"""`
			`Return a generator for the zipfile. This implementation will yield after`
			`every chunksize uncompressed bytes, or at the end of a file, whichever`
			`comes first.`

			`The value it yields is the number of chunks left to unzip.`
			`"""`
			`czf = ChunkingZipFile(filename, 'r')`
			`if not os.path.exists(directory):`
			`os.makedirs(directory)`
			`remaining = countZipFileChunks(filename, chunksize)`
			`names = czf.namelist()`
			`infos = czf.infolist()`

			`for entry, info in zip(names, infos):`
			`isdir = info.external_attr & DIR_BIT`
			`f = os.path.join(directory, entry)`
			`if isdir:`
			`# overwrite flag only applies to files`
			`if not os.path.exists(f):`
			`os.makedirs(f)`
			`remaining -= 1`
			`yield remaining`
			`else:`
			`# create the directory the file will be in first,`
			`# since we can't guarantee it exists`
			`fdir = os.path.split(f)[0]`
			`if not os.path.exists(fdir):`
			`os.makedirs(fdir)`
			`if overwrite or not os.path.exists(f):`
			`outfile = file(f, 'wb')`
			`fp = czf.readfile(entry)`
			`if info.file_size == 0:`
			`remaining -= 1`
			`yield remaining`
			`while fp.tell() < info.file_size:`
			`hunk = fp.read(chunksize)`
			`outfile.write(hunk)`
			`remaining -= 1`
			`yield remaining`
			`outfile.close()`
			`else:`
			`remaining -= countFileChunks(info, chunksize)`
			`yield remaining`