update pypdf2

2016-02-08 11:50:06 +05:30 · 2016-02-08 11:50:06 +05:30 · 66205d529e
commit 66205d529e
parent b8b1fe89bd
19 changed files with 626 additions and 315 deletions
--- a/Shared/lib/python3.4/site-packages/PyPDF2/_version.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/_version.py
@ -1,2 +1 @@
-__version__ = '1.23'
-
+__version__ = '1.25.1'
--- a/Shared/lib/python3.4/site-packages/PyPDF2/filters.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/filters.py
@ -40,28 +40,35 @@ if version_info < ( 3, 0 ):
    from cStringIO import StringIO
 else:
    from io import StringIO
+    import struct

 try:
    import zlib
+
    def decompress(data):
        return zlib.decompress(data)
+
    def compress(data):
        return zlib.compress(data)
+
 except ImportError:
    # Unable to import zlib.  Attempt to use the System.IO.Compression
    # library from the .NET framework. (IronPython only)
    import System
    from System import IO, Collections, Array
+
    def _string_to_bytearr(buf):
        retval = Array.CreateInstance(System.Byte, len(buf))
        for i in range(len(buf)):
            retval[i] = ord(buf[i])
        return retval
+
    def _bytearr_to_string(bytes):
        retval = ""
        for i in range(bytes.Length):
            retval += chr(bytes[i])
        return retval
+
    def _read_bytes(stream):
        ms = IO.MemoryStream()
        buf = Array.CreateInstance(System.Byte, 2048)
@ -74,6 +81,7 @@ except ImportError:
        retval = ms.ToArray()
        ms.Close()
        return retval
+
    def decompress(data):
        bytes = _string_to_bytearr(data)
        ms = IO.MemoryStream()
@ -84,6 +92,7 @@ except ImportError:
        retval = _bytearr_to_string(bytes)
        gz.Close()
        return retval
+
    def compress(data):
        bytes = _string_to_bytearr(data)
        ms = IO.MemoryStream()
@ -106,7 +115,7 @@ class FlateDecode(object):
                predictor = decodeParms.get("/Predictor", 1)
            except AttributeError:
                pass    # usually an array with a null object was read
-            
+
        # predictor 1 == no predictor
        if predictor != 1:
            columns = decodeParms["/Columns"]
@ -144,6 +153,7 @@ class FlateDecode(object):
        return compress(data)
    encode = staticmethod(encode)

+
 class ASCIIHexDecode(object):
    def decode(data, decodeParms=None):
        retval = ""
@ -165,6 +175,7 @@ class ASCIIHexDecode(object):
        return retval
    decode = staticmethod(decode)

+
 class LZWDecode(object):
    """Taken from:
    http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
@ -184,7 +195,6 @@ class LZWDecode(object):
        def resetDict(self):
            self.dictlen=258
            self.bitspercode=9
-                

        def nextCode(self):
            fillbits=self.bitspercode
@ -196,8 +206,8 @@ class LZWDecode(object):
                bitsfromhere=8-self.bitpos
                if bitsfromhere>fillbits:
                    bitsfromhere=fillbits
-                value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & 
-                           (0xff >> (8-bitsfromhere))) << 
+                value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
+                           (0xff >> (8-bitsfromhere))) <<
                          (fillbits-bitsfromhere))
                fillbits -= bitsfromhere
                self.bitpos += bitsfromhere
@ -235,70 +245,93 @@ class LZWDecode(object):
                        baos+=p
                        self.dict[self.dictlen] = p;
                        self.dictlen+=1
-                    if (self.dictlen >= (1 << self.bitspercode) - 1 and 
+                    if (self.dictlen >= (1 << self.bitspercode) - 1 and
                        self.bitspercode < 12):
                        self.bitspercode+=1
            return baos

-
-    
    @staticmethod
    def decode(data,decodeParams=None):
        return LZWDecode.decoder(data).decode()

+
 class ASCII85Decode(object):
    def decode(data, decodeParms=None):
-        retval = ""
-        group = []
-        x = 0
-        hitEod = False
-        # remove all whitespace from data
-        data = [y for y in data if not (y in ' \n\r\t')]
-        while not hitEod:
-            c = data[x]
-            if len(retval) == 0 and c == "<" and data[x+1] == "~":
-                x += 2
-                continue
-            #elif c.isspace():
-            #    x += 1
-            #    continue
-            elif c == 'z':
-                assert len(group) == 0
-                retval += '\x00\x00\x00\x00'
-                x += 1
-                continue
-            elif c == "~" and data[x+1] == ">":
-                if len(group) != 0:
-                    # cannot have a final group of just 1 char
-                    assert len(group) > 1
-                    cnt = len(group) - 1
-                    group += [ 85, 85, 85 ]
-                    hitEod = cnt
+        if version_info < ( 3, 0 ):
+            retval = ""
+            group = []
+            x = 0
+            hitEod = False
+            # remove all whitespace from data
+            data = [y for y in data if not (y in ' \n\r\t')]
+            while not hitEod:
+                c = data[x]
+                if len(retval) == 0 and c == "<" and data[x+1] == "~":
+                    x += 2
+                    continue
+                #elif c.isspace():
+                #    x += 1
+                #    continue
+                elif c == 'z':
+                    assert len(group) == 0
+                    retval += '\x00\x00\x00\x00'
+                    x += 1
+                    continue
+                elif c == "~" and data[x+1] == ">":
+                    if len(group) != 0:
+                        # cannot have a final group of just 1 char
+                        assert len(group) > 1
+                        cnt = len(group) - 1
+                        group += [ 85, 85, 85 ]
+                        hitEod = cnt
+                    else:
+                        break
                else:
+                    c = ord(c) - 33
+                    assert c >= 0 and c < 85
+                    group += [ c ]
+                if len(group) >= 5:
+                    b = group[0] * (85**4) + \
+                        group[1] * (85**3) + \
+                        group[2] * (85**2) + \
+                        group[3] * 85 + \
+                        group[4]
+                    assert b < (2**32 - 1)
+                    c4 = chr((b >> 0) % 256)
+                    c3 = chr((b >> 8) % 256)
+                    c2 = chr((b >> 16) % 256)
+                    c1 = chr(b >> 24)
+                    retval += (c1 + c2 + c3 + c4)
+                    if hitEod:
+                        retval = retval[:-4+hitEod]
+                    group = []
+                x += 1
+            return retval
+        else:
+            if isinstance(data, str):
+                data = data.encode('ascii')
+            n = b = 0
+            out = bytearray()
+            for c in data:
+                if ord('!') <= c and c <= ord('u'):
+                    n += 1
+                    b = b*85+(c-33)
+                    if n == 5:
+                        out += struct.pack(b'>L',b)
+                        n = b = 0
+                elif c == ord('z'):
+                    assert n == 0
+                    out += b'\0\0\0\0'
+                elif c == ord('~'):
+                    if n:
+                        for _ in range(5-n):
+                            b = b*85+84
+                        out += struct.pack(b'>L',b)[:n-1]
                    break
-            else:
-                c = ord(c) - 33
-                assert c >= 0 and c < 85
-                group += [ c ]
-            if len(group) >= 5:
-                b = group[0] * (85**4) + \
-                    group[1] * (85**3) + \
-                    group[2] * (85**2) + \
-                    group[3] * 85 + \
-                    group[4]
-                assert b < (2**32 - 1)
-                c4 = chr((b >> 0) % 256)
-                c3 = chr((b >> 8) % 256)
-                c2 = chr((b >> 16) % 256)
-                c1 = chr(b >> 24)
-                retval += (c1 + c2 + c3 + c4)
-                if hitEod:
-                    retval = retval[:-4+hitEod]
-                group = []
-            x += 1
-        return retval
+            return bytes(out)
    decode = staticmethod(decode)

+
 def decodeStreamData(stream):
    from .generic import NameObject
    filters = stream.get("/Filter", ())
@ -306,22 +339,24 @@ def decodeStreamData(stream):
        # we have a single filter instance
        filters = (filters,)
    data = stream._data
-    for filterType in filters:
-        if filterType == "/FlateDecode":
-            data = FlateDecode.decode(data, stream.get("/DecodeParms"))
-        elif filterType == "/ASCIIHexDecode":
-            data = ASCIIHexDecode.decode(data)
-        elif filterType == "/LZWDecode":
-            data = LZWDecode.decode(data, stream.get("/DecodeParms"))
-        elif filterType == "/ASCII85Decode":
-            data = ASCII85Decode.decode(data)
-        elif filterType == "/Crypt":
-            decodeParams = stream.get("/DecodeParams", {})
-            if "/Name" not in decodeParams and "/Type" not in decodeParams:
-                pass
+    # If there is not data to decode we should not try to decode the data.
+    if data:
+        for filterType in filters:
+            if filterType == "/FlateDecode" or filterType == "/Fl":
+                data = FlateDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
+                data = ASCIIHexDecode.decode(data)
+            elif filterType == "/LZWDecode" or filterType == "/LZW":
+                data = LZWDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCII85Decode" or filterType == "/A85":
+                data = ASCII85Decode.decode(data)
+            elif filterType == "/Crypt":
+                decodeParams = stream.get("/DecodeParams", {})
+                if "/Name" not in decodeParams and "/Type" not in decodeParams:
+                    pass
+                else:
+                    raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
            else:
-                raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
-        else:
-            # unsupported filter
-            raise NotImplementedError("unsupported filter %s" % filterType)
+                # unsupported filter
+                raise NotImplementedError("unsupported filter %s" % filterType)
    return data
--- a/Shared/lib/python3.4/site-packages/PyPDF2/generic.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/generic.py
@ -43,11 +43,14 @@ from . import filters
 from . import utils
 import decimal
 import codecs
+import sys
 #import debugging

 ObjectPrefix = b_('/<[tf(n%')
 NumberSigns = b_('+-')
 IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
+
+
 def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1) # reset to start
@ -94,6 +97,7 @@ def readObject(stream, pdf):
        else:
            return NumberObject.readFromStream(stream)

+
 class PdfObject(object):
    def getObject(self):
        """Resolves indirect references."""
@ -225,6 +229,7 @@ class FloatObject(decimal.Decimal, PdfObject):
            return decimal.Decimal.__new__(cls, utils.str_(value), context)
        except:
            return decimal.Decimal.__new__(cls, str(value))
+
    def __repr__(self):
        if self == self.to_integral():
            return str(self.quantize(decimal.Decimal(1)))
@ -244,7 +249,11 @@ class NumberObject(int, PdfObject):
    ByteDot = b_(".")

    def __new__(cls, value):
-        return int.__new__(cls, value)
+        val = int(value)
+        try:
+            return int.__new__(cls, val)
+        except OverflowError:
+            return int.__new__(cls, 0)

    def as_numeric(self):
        return int(b_(repr(self)))
@ -253,16 +262,7 @@ class NumberObject(int, PdfObject):
        stream.write(b_(repr(self)))

    def readFromStream(stream):
-        num = b_("")
-        while True:
-            tok = stream.read(16)
-            m = NumberObject.NumberPattern.search(tok)
-            if m is not None:
-                stream.seek(m.start() - len(tok), 1)
-                num += tok[:m.start()]
-                break
-
-            num += tok
+        num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
        if num.find(NumberObject.ByteDot) != -1:
            return FloatObject(num)
        else:
@ -345,13 +345,18 @@ def readStringFromStream(stream):
                tok = b_("\b")
            elif tok == b_("f"):
                tok = b_("\f")
+            elif tok == b_("c"):
+                tok = b_("\c")
            elif tok == b_("("):
                tok = b_("(")
            elif tok == b_(")"):
                tok = b_(")")
+            elif tok == b_("/"):
+                tok = b_("/")
            elif tok == b_("\\"):
                tok = b_("\\")
-            elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")):
+            elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), 
+                    b_("]"), b_("#"),  b_("_"), b_("&"), b_('$')):
                # odd/unnessecary escape sequences we have encountered
                tok = b_(tok)
            elif tok.isdigit():
@ -378,7 +383,7 @@ def readStringFromStream(stream):
                # line break was escaped:
                tok = b_('')
            else:
-                raise utils.PdfReadError("Unexpected escaped string")
+                raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
        txt += tok
    return createStringObject(txt)

@ -456,7 +461,7 @@ class TextStringObject(utils.string_type, PdfObject):


 class NameObject(str, PdfObject):
-    delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]"))
+    delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
    surfix = b_("/")

    def writeToStream(self, stream, encryption_key):
@ -468,11 +473,12 @@ class NameObject(str, PdfObject):
        name = stream.read(1)
        if name != NameObject.surfix:
            raise utils.PdfReadError("name read error")
-        name += utils.readUntilRegex(stream, NameObject.delimiterPattern)
+        name += utils.readUntilRegex(stream, NameObject.delimiterPattern, 
+            ignore_eof=True)
        if debug: print(name)
        try:
            return NameObject(name.decode('utf-8'))
-        except UnicodeDecodeError as e:
+        except (UnicodeEncodeError, UnicodeDecodeError) as e:
            # Name objects should represent irregular characters
            # with a '#' followed by the symbol's hex number
            if not pdf.strict:
@ -630,6 +636,7 @@ class DictionaryObject(dict, PdfObject):
            return retval
    readFromStream = staticmethod(readFromStream)

+
 class TreeObject(DictionaryObject):
    def __init__(self):
        DictionaryObject.__init__(self)
@ -726,7 +733,6 @@ class TreeObject(DictionaryObject):
                found = True
                break

-
            prevRef = curRef
            prev = cur
            if NameObject('/Next') in cur:
@ -938,6 +944,7 @@ class RectangleObject(ArrayObject):
    in (x,y) form.
    """

+
 class Field(TreeObject):
    """
    A class representing a field dictionary. This class is accessed through
@ -1009,6 +1016,7 @@ class Field(TreeObject):
    See Section 8.5.2 of the PDF 1.7 reference.
    """

+
 class Destination(TreeObject):
    """
    A class representing a destination within a PDF file.
@ -1157,6 +1165,7 @@ def encode_pdfdocencoding(unicode_string):
                    "does not exist in translation table")
    return retval

+
 def decode_pdfdocencoding(byte_array):
    retval = u_('')
    for b in byte_array:
@ -1211,4 +1220,3 @@ for i in range(256):
        continue
    assert char not in _pdfDocEncoding_rev
    _pdfDocEncoding_rev[char] = i
-
--- a/Shared/lib/python3.4/site-packages/PyPDF2/merger.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/merger.py
@ -28,7 +28,7 @@
 # POSSIBILITY OF SUCH DAMAGE.

 from .generic import *
-from .utils import string_type
+from .utils import isString, str_
 from .pdf import PdfFileReader, PdfFileWriter
 from .pagerange import PageRange
 from sys import version_info
@ -40,6 +40,7 @@ else:
    from io import FileIO as file
    StreamIO = BytesIO

+
 class _MergedPage(object):
    """
    _MergedPage is used internally by PdfFileMerger to collect necessary
@ -50,13 +51,14 @@ class _MergedPage(object):
        self.pagedata = pagedata
        self.out_pagedata = None
        self.id = id
-        
+
+
 class PdfFileMerger(object):
    """
    Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
    into a single PDF. It can concatenate, slice, insert, or any combination
    of the above.
-    
+
    See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
    and :meth:`write()<write>` for usage information.

@ -64,7 +66,7 @@ class PdfFileMerger(object):
            problems and also causes some correctable problems to be fatal.
            Defaults to ``True``.
    """
-    
+
    def __init__(self, strict=True):
        self.inputs = []
        self.pages = []
@ -73,7 +75,7 @@ class PdfFileMerger(object):
        self.named_dests = []
        self.id_count = 0
        self.strict = strict
-        
+
    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        Merges the pages from the given file into the output file at the
@ -85,29 +87,30 @@ class PdfFileMerger(object):
        :param fileobj: A File Object or an object that supports the standard read
            and seek methods similar to a File Object. Could also be a
            string representing a path to a PDF file.
-        
+
        :param str bookmark: Optionally, you may specify a bookmark to be applied at
            the beginning of the included file by supplying the text of the bookmark.

        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
            to merge only the specified range of pages from the source
            document into the output document.
-        
+
        :param bool import_bookmarks: You may prevent the source document's bookmarks
            from being imported by specifying this as ``False``.
        """
-        
+
        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False
-        
+
        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
-        # copy the file's contents into a BytesIO (or StreamIO) stream object; if 
-        # it is a PdfFileReader, copy that reader's stream into a 
+        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
+        # it is a PdfFileReader, copy that reader's stream into a
        # BytesIO (or StreamIO) stream.
        # If fileobj is none of the above types, it is not modified
-        if type(fileobj) == string_type:
+        decryption_key = None
+        if isString(fileobj):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif isinstance(fileobj, file):
@ -116,17 +119,21 @@ class PdfFileMerger(object):
            fileobj = StreamIO(filecontent)
            my_file = True
        elif isinstance(fileobj, PdfFileReader):
-            orig_tell = fileobj.stream.tell()   
+            orig_tell = fileobj.stream.tell()
            fileobj.stream.seek(0)
            filecontent = StreamIO(fileobj.stream.read())
            fileobj.stream.seek(orig_tell) # reset the stream to its original location
            fileobj = filecontent
+            if hasattr(fileobj, '_decryption_key'):
+                decryption_key = fileobj._decryption_key
            my_file = True
-            
+
        # Create a new PdfFileReader instance using the stream
        # (either file or BytesIO or StringIO) created above
        pdfr = PdfFileReader(fileobj, strict=self.strict)
-        
+        if decryption_key is not None:
+            pdfr._decryption_key = decryption_key
+
        # Find the range of pages to merge.
        if pages == None:
            pages = (0, pdfr.getNumPages())
@ -134,47 +141,45 @@ class PdfFileMerger(object):
            pages = pages.indices(pdfr.getNumPages())
        elif not isinstance(pages, tuple):
            raise TypeError('"pages" must be a tuple of (start, stop[, step])')
-        
+
        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
-        
+
        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)
-        
+
        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline
-        
+
        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests
-        
+
        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)
-            
+
            id = self.id_count
            self.id_count += 1
-            
+
            mp = _MergedPage(pg, pdfr, id)
-            
+
            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)
-            
-        
+
        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages
-        
+
        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))
-        
-        
+
    def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
@ -183,7 +188,7 @@ class PdfFileMerger(object):
        :param fileobj: A File Object or an object that supports the standard read
            and seek methods similar to a File Object. Could also be a
            string representing a path to a PDF file.
-        
+
        :param str bookmark: Optionally, you may specify a bookmark to be applied at
            the beginning of the included file by supplying the text of the bookmark.

@ -194,10 +199,9 @@ class PdfFileMerger(object):
        :param bool import_bookmarks: You may prevent the source document's bookmarks
            from being imported by specifying this as ``False``.
        """
-        
+
        self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
-        
-    
+
    def write(self, fileobj):
        """
        Writes all data that has been merged to the given output file.
@ -206,11 +210,10 @@ class PdfFileMerger(object):
            file-like object.
        """
        my_file = False
-        if type(fileobj) in (str, str):
+        if isString(fileobj):
            fileobj = file(fileobj, 'wb')
            my_file = True

-
        # Add pages to the PdfFileWriter
        # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
        for page in self.pages:
@ -222,15 +225,13 @@ class PdfFileMerger(object):
        # Once all pages are added, create bookmarks to point at those pages
        self._write_dests()
        self._write_bookmarks()
-        
-        # Write the output to the file   
+
+        # Write the output to the file
        self.output.write(fileobj)
-        
+
        if my_file:
            fileobj.close()

-
-        
    def close(self):
        """
        Shuts all file descriptors (input and output) and clears all memory
@ -240,7 +241,7 @@ class PdfFileMerger(object):
        for fo, pdfr, mine in self.inputs:
            if mine:
                fo.close()
-        
+
        self.inputs = []
        self.output = None

@ -253,7 +254,7 @@ class PdfFileMerger(object):
            Example: ``{u'/Title': u'My title'}``
        """
        self.output.addMetadata(infos)
-    
+
    def setPageLayout(self, layout):
        """
        Set the page layout
@ -289,7 +290,7 @@ class PdfFileMerger(object):

    def _trim_dests(self, pdf, dests, pages):
        """
-        Removes any named destinations that are not a part of the specified 
+        Removes any named destinations that are not a part of the specified
        page set.
        """
        new_dests = []
@ -298,14 +299,14 @@ class PdfFileMerger(object):
            for j in range(*pages):
                if pdf.getPage(j).getObject() == o['/Page'].getObject():
                    o[NameObject('/Page')] = o['/Page'].getObject()
-                    assert str(k) == str(o['/Title'])
+                    assert str_(k) == str_(o['/Title'])
                    new_dests.append(o)
                    break
        return new_dests
-    
+
    def _trim_outline(self, pdf, outline, pages):
        """
-        Removes any outline/bookmark entries that are not a part of the 
+        Removes any outline/bookmark entries that are not a part of the
        specified page set.
        """
        new_outline = []
@ -326,10 +327,10 @@ class PdfFileMerger(object):
                        prev_header_added = True
                        break
        return new_outline
-   
+
    def _write_dests(self):
        dests = self.named_dests
-        
+
        for v in dests:
            pageno = None
            pdf = None
@ -342,19 +343,18 @@ class PdfFileMerger(object):
                        break
            if pageno != None:
                self.output.addNamedDestinationObject(v)
- 
+
    def _write_bookmarks(self, bookmarks=None, parent=None):
-        
+
        if bookmarks == None:
            bookmarks = self.bookmarks
-        

        last_added = None
        for b in bookmarks:
            if isinstance(b, list):
                self._write_bookmarks(b, last_added)
                continue
-                
+
            pageno = None
            pdf = None
            if '/Page' in b:
@ -410,31 +410,31 @@ class PdfFileMerger(object):
                            del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']

                        b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
-                       
+
                        pageno = i
                        pdf = p.src
                        break
            if pageno != None:
                del b['/Page'], b['/Type']
-                last_added = self.output.addBookmarkDict(b, parent)    
+                last_added = self.output.addBookmarkDict(b, parent)

    def _associate_dests_to_pages(self, pages):
        for nd in self.named_dests:
            pageno = None
            np = nd['/Page']
-            
+
            if isinstance(np, NumberObject):
                continue
-            
+
            for p in pages:
                if np.getObject() == p.pagedata.getObject():
                    pageno = p.id
-            
+
            if pageno != None:
                nd[NameObject('/Page')] = NumberObject(pageno)
            else:
                raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
-    
+
    def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
        if bookmarks == None:
            bookmarks = self.bookmarks
@ -443,35 +443,35 @@ class PdfFileMerger(object):
            if isinstance(b, list):
                self._associate_bookmarks_to_pages(pages, b)
                continue
-                
+
            pageno = None
            bp = b['/Page']
-            
+
            if isinstance(bp, NumberObject):
                continue
-                
+
            for p in pages:
                if bp.getObject() == p.pagedata.getObject():
                    pageno = p.id
-            
+
            if pageno != None:
                b[NameObject('/Page')] = NumberObject(pageno)
            else:
                raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
-                
+
    def findBookmark(self, bookmark, root=None):
-    	if root == None:
-    		root = self.bookmarks
-    	
-    	for i, b in enumerate(root):
-    		if isinstance(b, list):
-    			res = self.findBookmark(bookmark, b)
-    			if res:
-    				return [i] + res
-    		elif b == bookmark or b['/Title'] == bookmark:
-    			return [i]
-    
-    	return None
+        if root == None:
+            root = self.bookmarks
+
+        for i, b in enumerate(root):
+            if isinstance(b, list):
+                res = self.findBookmark(bookmark, b)
+                if res:
+                    return [i] + res
+            elif b == bookmark or b['/Title'] == bookmark:
+                return [i]
+
+        return None

    def addBookmark(self, title, pagenum, parent=None):
        """
@ -483,28 +483,27 @@ class PdfFileMerger(object):
            bookmarks.
        """
        if parent == None:
-        	iloc = [len(self.bookmarks)-1]
+            iloc = [len(self.bookmarks)-1]
        elif isinstance(parent, list):
-        	iloc = parent
+            iloc = parent
        else:
-        	iloc = self.findBookmark(parent)
-        
+            iloc = self.findBookmark(parent)
+
        dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
-        
+
        if parent == None:
-        	self.bookmarks.append(dest)
+            self.bookmarks.append(dest)
        else:
-        	bmparent = self.bookmarks
-        	for i in iloc[:-1]:
-        		bmparent = bmparent[i]
-        	npos = iloc[-1]+1
-        	if npos < len(bmparent) and isinstance(bmparent[npos], list):
-        		bmparent[npos].append(dest)
-        	else:
-        		bmparent.insert(npos, [dest])
+            bmparent = self.bookmarks
+            for i in iloc[:-1]:
+                bmparent = bmparent[i]
+            npos = iloc[-1]+1
+            if npos < len(bmparent) and isinstance(bmparent[npos], list):
+                bmparent[npos].append(dest)
+            else:
+                bmparent.insert(npos, [dest])
        return dest
-        		
-        
+
    def addNamedDestination(self, title, pagenum):
        """
        Add a destination to the output.
@ -512,7 +511,7 @@ class PdfFileMerger(object):
        :param str title: Title to use
        :param int pagenum: Page number this destination points at.
        """
-        
+
        dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
        self.named_dests.append(dest)

@ -523,12 +522,12 @@ class OutlinesObject(list):
        self.tree = tree
        self.pdf = pdf
        self.parent = parent
-    
+
    def remove(self, index):
        obj = self[index]
        del self[index]
        self.tree.removeChild(obj)
-        
+
    def add(self, title, pagenum):
        pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
        action = DictionaryObject()
@ -547,7 +546,7 @@ class OutlinesObject(list):
        self.pdf._addObject(bookmark)

        self.tree.addChild(bookmark)
-        
+
    def removeAll(self):
        for child in [x for x in self.tree.children()]:
            self.tree.removeChild(child)
--- a/Shared/lib/python3.4/site-packages/PyPDF2/pagerange.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/pagerange.py
@ -8,7 +8,7 @@ see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
 """

 import re
-from .utils import Str
+from .utils import isString

 _INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
 PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
@ -32,11 +32,11 @@ PAGE_RANGE_HELP = """Remember, page indices start with zero.
            ::-1      all pages in reverse order.
 """

-        
+
 class PageRange(object):
-    """ 
+    """
    A slice-like representation of a range of page indices,
-        i.e. page numbers, only starting at zero. 
+        i.e. page numbers, only starting at zero.
    The syntax is like what you would put between brackets [ ].
    The slice is one of the few Python types that can't be subclassed,
    but this class converts to and from slices, and allows similar use.
@ -46,7 +46,7 @@ class PageRange(object):
      o  str() and repr() allow printing.
      o  indices(n) is like slice.indices(n).
    """
-    
+
    def __init__(self, arg):
        """
        Initialize with either a slice -- giving the equivalent page range,
@ -67,8 +67,8 @@ class PageRange(object):
        if isinstance(arg, PageRange):
            self._slice = arg.to_slice()
            return
-        
-        m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg)
+
+        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
        if not m:
            raise ParseError(arg)
        elif m.group(2):
@ -77,25 +77,25 @@ class PageRange(object):
            stop = start + 1 if start != -1 else None
            self._slice = slice(start, stop)
        else:
-            self._slice = slice(*[int(g) if g else None 
+            self._slice = slice(*[int(g) if g else None
                                  for g in m.group(4, 6, 8)])
-    
+
    # Just formatting this when there is __doc__ for __init__
    if __init__.__doc__:
        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
-        
+
    @staticmethod
    def valid(input):
        """ True if input is a valid initializer for a PageRange. """
        return isinstance(input, slice)  or \
               isinstance(input, PageRange) or \
-               (isinstance(input, Str)
+               (isString(input)
                and bool(re.match(PAGE_RANGE_RE, input)))

    def to_slice(self):
        """ Return the slice equivalent of this page range. """
        return self._slice
-        
+
    def __str__(self):
        """ A string like "1:2:3". """
        s = self._slice
@ -127,7 +127,7 @@ def parse_filename_page_ranges(args):
    """
    Given a list of filenames and page ranges, return a list of
    (filename, page_range) pairs.
-    First arg must be a filename; other ags are filenames, page-range 
+    First arg must be a filename; other ags are filenames, page-range
    expressions, slice objects, or PageRange objects.
    A filename not followed by a page range indicates all pages of the file.
    """
@ -146,7 +146,7 @@ def parse_filename_page_ranges(args):
            # New filename or end of list--do all of the previous file?
            if pdf_filename and not did_page_range:
                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
-                    
+
            pdf_filename = arg
            did_page_range = False
    return pairs
--- a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py
@ -63,7 +63,7 @@ import warnings
 import codecs
 from .generic import *
 from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
-from .utils import Str, b_, u_, ord_, chr_, str_, string_type, formatWarning
+from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning

 if version_info < ( 2, 4 ):
   from sets import ImmutableSet as frozenset
@ -74,6 +74,7 @@ else:
    from hashlib import md5
 import uuid

+
 class PdfFileWriter(object):
    """
    This class supports writing PDF files out, given pages produced by another
@ -228,6 +229,157 @@ class PdfFileWriter(object):
                NameObject("/OpenAction"): self._addObject(js)
                })

+    def addAttachment(self, fname, fdata):
+        """
+        Embed a file inside the PDF.
+
+        :param str fname: The filename to display.
+        :param str fdata: The data in the file.
+      
+        Reference:
+        https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
+        Section 7.11.3
+        """
+        
+        # We need 3 entries:
+        # * The file's data
+        # * The /Filespec entry
+        # * The file's name, which goes in the Catalog
+        
+
+        # The entry for the file
+        """ Sample:
+        8 0 obj
+        <<
+         /Length 12
+         /Type /EmbeddedFile
+        >>
+        stream
+        Hello world!
+        endstream
+        endobj        
+        """
+        file_entry = DecodedStreamObject()
+        file_entry.setData(fdata)
+        file_entry.update({
+                NameObject("/Type"): NameObject("/EmbeddedFile")
+                })
+
+        # The Filespec entry
+        """ Sample:
+        7 0 obj
+        <<
+         /Type /Filespec
+         /F (hello.txt)
+         /EF << /F 8 0 R >>
+        >>
+        """
+        efEntry = DictionaryObject()
+        efEntry.update({ NameObject("/F"):file_entry })
+        
+        filespec = DictionaryObject()
+        filespec.update({
+                NameObject("/Type"): NameObject("/Filespec"),
+                NameObject("/F"): createStringObject(fname),  # Perhaps also try TextStringObject
+                NameObject("/EF"): efEntry
+                })
+                
+        # Then create the entry for the root, as it needs a reference to the Filespec
+        """ Sample:
+        1 0 obj
+        <<
+         /Type /Catalog
+         /Outlines 2 0 R
+         /Pages 3 0 R
+         /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
+        >>
+        endobj
+        
+        """
+        embeddedFilesNamesDictionary = DictionaryObject()
+        embeddedFilesNamesDictionary.update({
+                NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
+                })
+        
+        embeddedFilesDictionary = DictionaryObject()
+        embeddedFilesDictionary.update({
+                NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
+                })
+        # Update the root
+        self._root_object.update({
+                NameObject("/Names"): embeddedFilesDictionary
+                })
+
+    def appendPagesFromReader(self, reader, after_page_append=None):
+        """
+        Copy pages from reader to writer. Includes an optional callback parameter
+        which is invoked after pages are appended to the writer.
+        
+        :param reader: a PdfFileReader object from which to copy page
+            annotations to this writer object.  The writer's annots
+        will then be updated
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page
+                appended to the writer.
+        """
+        # Get page count from writer and reader
+        reader_num_pages = reader.getNumPages()
+        writer_num_pages = self.getNumPages()
+
+        # Copy pages from reader to writer
+        for rpagenum in range(0, reader_num_pages):
+            reader_page = reader.getPage(rpagenum)
+            self.addPage(reader_page)
+            writer_page = self.getPage(writer_num_pages+rpagenum)
+            # Trigger callback, pass writer page as parameter
+            if callable(after_page_append): after_page_append(writer_page)
+
+    def updatePageFormFieldValues(self, page, fields):
+        '''
+        Update the form field values for a given page from a fields dictionary.
+        Copy field texts and values from fields to page.
+
+        :param page: Page reference from PDF writer where the annotations
+            and field data will be updated.
+        :param fields: a Python dictionary of field names (/T) and text
+            values (/V)
+        '''
+        # Iterate through pages, update field values
+        for j in range(0, len(page['/Annots'])):
+            writer_annot = page['/Annots'][j].getObject()
+            for field in fields:
+                if writer_annot.get('/T') == field:
+                    writer_annot.update({
+                        NameObject("/V"): TextStringObject(fields[field])
+                    })
+
+    def cloneReaderDocumentRoot(self, reader):
+        '''
+        Copy the reader document root to the writer.
+        
+        :param reader:  PdfFileReader from the document root should be copied.
+        :callback after_page_append
+        '''
+        self._root_object = reader.trailer['/Root']
+
+    def cloneDocumentFromReader(self, reader, after_page_append=None):
+        '''
+        Create a copy (clone) of a document from a PDF file reader
+
+        :param reader: PDF file reader instance from which the clone
+            should be created.
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Signature includes a reference to the
+            appended page (delegates to appendPagesFromReader). Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page just
+                appended to the document.
+        '''
+        self.cloneReaderDocumentRoot(reader)
+        self.appendPagesFromReader(reader, after_page_append)
+
    def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
        """
        Encrypt this PDF file with the PDF Standard encryption handler.
@ -516,7 +668,6 @@ class PdfFileWriter(object):

        return bookmarkRef

-
    def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
        """
        Add a bookmark to this PDF file.
@ -553,7 +704,6 @@ class PdfFileWriter(object):
        if parent == None:
            parent = outlineRef

-
        bookmark = TreeObject()

        bookmark.update({
@ -759,7 +909,7 @@ class PdfFileWriter(object):
        else:
            borderArr = [NumberObject(0)] * 3

-        if isinstance(rect, Str):
+        if isString(rect):
            rect = NameObject(rect)
        elif isinstance(rect, RectangleObject):
            pass
@ -871,6 +1021,7 @@ class PdfFileWriter(object):
    """Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
    and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""

+
 class PdfFileReader(object):
    """
    Initializes a PdfFileReader object.  This operation can take some time, as
@ -904,9 +1055,10 @@ class PdfFileReader(object):
        self.flattenedPages = None
        self.resolvedObjects = {}
        self.xrefIndex = 0
+        self._pageId2Num = None # map page IndirectRef number to Page Number
        if hasattr(stream, 'mode') and 'b' not in stream.mode:
            warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
-        if type(stream) in (string_type, str):
+        if isString(stream):
            fileobj = open(stream, 'rb')
            stream = BytesIO(b_(fileobj.read()))
            fileobj.close()
@ -973,6 +1125,7 @@ class PdfFileReader(object):
        if self.isEncrypted:
            try:
                self._override_encryption = True
+                self.decrypt('')
                return self.trailer["/Root"]["/Pages"]["/Count"]
            except:
                raise utils.PdfReadError("File has not been decrypted")
@ -1160,7 +1313,14 @@ class PdfFileReader(object):

            # get the outline dictionary and named destinations
            if "/Outlines" in catalog:
-                lines = catalog["/Outlines"]
+                try:
+                    lines = catalog["/Outlines"]
+                except utils.PdfReadError:
+                    # this occurs if the /Outlines object reference is incorrect
+                    # for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
+                    # so continue to load the file without the Bookmarks
+                    return outlines
+
                if "/First" in lines:
                    node = lines["/First"]
            self._namedDests = self.getNamedDestinations()
@ -1187,6 +1347,49 @@ class PdfFileReader(object):

        return outlines

+    def _getPageNumberByIndirect(self, indirectRef):
+        """Generate _pageId2Num"""
+        if self._pageId2Num is None:
+            id2num = {}
+            for i, x in enumerate(self.pages):
+                id2num[x.indirectRef.idnum] = i
+            self._pageId2Num = id2num
+
+        if isinstance(indirectRef, int):
+            idnum = indirectRef
+        else:
+            idnum = indirectRef.idnum
+
+        ret = self._pageId2Num.get(idnum, -1)
+        return ret
+
+    def getPageNumber(self, page):
+        """
+        Retrieve page number of a given PageObject
+
+        :param PageObject page: The page to get page number. Should be
+            an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :return: the page number or -1 if page not found
+        :rtype: int
+        """
+        indirectRef = page.indirectRef
+        ret = self._getPageNumberByIndirect(indirectRef)
+        return ret
+
+    def getDestinationPageNumber(self, destination):
+        """
+        Retrieve page number of a given Destination object
+
+        :param Destination destination: The destination to get page number.
+             Should be an instance of
+             :class:`Destination<PyPDF2.pdf.Destination>`
+        :return: the page number or -1 if page not found
+        :rtype: int
+        """
+        indirectRef = destination.page
+        ret = self._getPageNumberByIndirect(indirectRef)
+        return ret
+
    def _buildDestination(self, title, array):
        page, typ = array[0:2]
        array = array[2:]
@ -1210,7 +1413,7 @@ class PdfFileReader(object):
        if dest:
            if isinstance(dest, ArrayObject):
                outline = self._buildDestination(title, dest)
-            elif isinstance(dest, Str) and dest in self._namedDests:
+            elif isString(dest) and dest in self._namedDests:
                outline = self._namedDests[dest]
                outline[NameObject("/Title")] = title
            else:
@ -1310,6 +1513,8 @@ class PdfFileReader(object):
        assert idx < objStm['/N']
        streamData = BytesIO(b_(objStm.getData()))
        for i in range(objStm['/N']):
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
            objnum = NumberObject.readFromStream(streamData)
            readNonWhitespace(streamData)
            streamData.seek(-1, 1)
@ -1347,7 +1552,6 @@ class PdfFileReader(object):
        if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
        return NullObject()

-
    def getObject(self, indirectReference):
        debug = False
        if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
@ -1470,7 +1674,7 @@ class PdfFileReader(object):
            startxref = int(line)
        except ValueError:
            # 'startxref' may be on the same line as the location
-            if not line.startswith("startxref"):
+            if not line.startswith(b_("startxref")):
                raise utils.PdfReadError("startxref not found")
            startxref = int(line[9:].strip())
            warnings.warn("startxref on same line as offset")
@ -1580,6 +1784,7 @@ class PdfFileReader(object):
                assert len(entrySizes) >= 3
                if self.strict and len(entrySizes) > 3:
                    raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
+
                def getEntry(i):
                    # Reads the correct number of bytes for each entry. See the
                    # discussion of the W parameter in PDF spec table 17.
@ -1664,8 +1869,7 @@ class PdfFileReader(object):
                if found:
                    continue
                # no xref table found at specified location
-                assert False
-                break
+                raise utils.PdfReadError("Could not find xref table at specified location")
        #if not zero-indexed, verify that the table is correct; change it if necessary
        if self.xrefIndex and not self.strict:
            loc = stream.tell()
@ -1683,7 +1887,6 @@ class PdfFileReader(object):
                    #if not, then either it's just plain wrong, or the non-zero-index is actually correct
            stream.seek(loc, 0) #return to where it was

-
    def _zeroXref(self, generation):
        self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )

@ -1700,8 +1903,13 @@ class PdfFileReader(object):
        if debug: print(">>readNextEndLine")
        line = b_("")
        while True:
+            # Prevent infinite loops in malformed PDFs
+            if stream.tell() == 0:
+                raise utils.PdfReadError("Could not read malformed PDF file")
            x = stream.read(1)
            if debug: print(("  x:", x, "%x"%ord(x)))
+            if stream.tell() < 2:
+                raise utils.PdfReadError("EOL marker not found")
            stream.seek(-2, 1)
            if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
                crlf = False
@ -1713,6 +1921,8 @@ class PdfFileReader(object):
                    if x == b_('\n') or x == b_('\r'): # account for CR+LF
                        stream.seek(-1, 1)
                        crlf = True
+                    if stream.tell() < 2:
+                        raise utils.PdfReadError("EOL marker not found")
                    stream.seek(-2, 1)
                stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
                break
@ -1827,14 +2037,17 @@ def getRectangle(self, name, defaults):
    setRectangle(self, name, retval)
    return retval

+
 def setRectangle(self, name, value):
    if not isinstance(name, NameObject):
        name = NameObject(name)
    self[name] = value

+
 def deleteRectangle(self, name):
    del self[name]

+
 def createRectangleAccessor(name, fallback):
    return \
        property(
@ -1843,6 +2056,7 @@ def createRectangleAccessor(name, fallback):
            lambda self: deleteRectangle(self, name)
            )

+
 class PageObject(DictionaryObject):
    """
    This class represents a single page within a PDF file.  Typically this
@ -2374,6 +2588,7 @@ class PageObject(DictionaryObject):
                for i in operands[0]:
                    if isinstance(i, TextStringObject):
                        text += i
+                text += "\n"
        return text

    mediaBox = createRectangleAccessor("/MediaBox", ())
@ -2412,6 +2627,7 @@ class PageObject(DictionaryObject):
    page's creator.
    """

+
 class ContentStream(DecodedStreamObject):
    def __init__(self, stream, pdf):
        self.pdf = pdf
@ -2437,25 +2653,25 @@ class ContentStream(DecodedStreamObject):
            if peek == b_('') or ord_(peek) == 0:
                break
            stream.seek(-1, 1)
-            if peek.isalpha() or peek == "'" or peek == '"':
+            if peek.isalpha() or peek == b_("'") or peek == b_('"'):
                operator = utils.readUntilRegex(stream,
                        NameObject.delimiterPattern, True)
-                if operator == "BI":
+                if operator == b_("BI"):
                    # begin inline image - a completely different parsing
                    # mechanism is required, of course... thanks buddy...
                    assert operands == []
                    ii = self._readInlineImage(stream)
-                    self.operations.append((ii, "INLINE IMAGE"))
+                    self.operations.append((ii, b_("INLINE IMAGE")))
                else:
                    self.operations.append((operands, operator))
                    operands = []
-            elif peek == '%':
+            elif peek == b_('%'):
                # If we encounter a comment in the content stream, we have to
                # handle it here.  Typically, readObject will handle
                # encountering a comment -- but readObject assumes that
                # following the comment must be the object we're trying to
                # read.  In this case, it could be an operator instead.
-                while peek not in ('\r', '\n'):
+                while peek not in (b_('\r'), b_('\n')):
                    peek = stream.read(1)
            else:
                operands.append(readObject(stream, None))
@ -2467,7 +2683,7 @@ class ContentStream(DecodedStreamObject):
        while True:
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
-            if tok == "I":
+            if tok == b_("I"):
                # "ID" - begin of image data
                break
            key = readObject(stream, self.pdf)
@ -2477,28 +2693,32 @@ class ContentStream(DecodedStreamObject):
            settings[key] = value
        # left at beginning of ID
        tmp = stream.read(3)
-        assert tmp[:2] == "ID"
-        data = ""
+        assert tmp[:2] == b_("ID")
+        data = b_("")
        while True:
+            # Read the inline image, while checking for EI (End Image) operator.
            tok = stream.read(1)
-            if tok == "E":
+            if tok == b_("E"):
                # Check for End Image
-                next1 = stream.read(1)
-                if next1 == "I":
-                    next2 = readNonWhitespace(stream)
-                    if next2 == 'Q':
+                tok2 = stream.read(1)
+                if tok2 == b_("I"):
+                    # Sometimes that data will contain EI, so check for the Q operator.
+                    tok3 = stream.read(1)
+                    info = tok + tok2
+                    while tok3 in utils.WHITESPACES:
+                        info += tok3
+                        tok3 = stream.read(1)
+                    if tok3 == b_("Q"):
                        stream.seek(-1, 1)
                        break
                    else:
-                        stream.seek(-2, 1)
-                        data += tok
+                        stream.seek(-1,1)
+                        data += info
                else:
                    stream.seek(-1, 1)
                    data += tok
            else:
                data += tok
-        x = readNonWhitespace(stream)
-        stream.seek(-1, 1)
        return {"settings": settings, "data": data}

    def _getData(self):
@ -2525,6 +2745,7 @@ class ContentStream(DecodedStreamObject):

    _data = property(_getData, _setData)

+
 class DocumentInformation(DictionaryObject):
    """
    A class representing the basic document metadata provided in a PDF File.
@ -2588,6 +2809,7 @@ class DocumentInformation(DictionaryObject):
    producer_raw = property(lambda self: self.get("/Producer"))
    """The "raw" version of producer; can return a ``ByteStringObject``."""

+
 def convertToInt(d, size):
    if size > 8:
        raise utils.PdfReadError("invalid size in convertToInt")
@ -2600,6 +2822,7 @@ _encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
        b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
        b_('\xa9\xfe\x64\x53\x69\x7a')

+
 # Implementation of algorithm 3.2 of the PDF standard security handler,
 # section 3.5.2 of the PDF 1.6 reference.
 def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
@ -2643,6 +2866,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    # entry.
    return md5_hash[:keylen]

+
 # Implementation of algorithm 3.3 of the PDF standard security handler,
 # section 3.5.2 of the PDF 1.6 reference.
 def _alg33(owner_pwd, user_pwd, rev, keylen):
@ -2670,6 +2894,7 @@ def _alg33(owner_pwd, user_pwd, rev, keylen):
    # the /O entry in the encryption dictionary.
    return val

+
 # Steps 1-4 of algorithm 3.3
 def _alg33_1(password, rev, keylen):
    # 1. Pad or truncate the owner password string as described in step 1 of
@ -2692,6 +2917,7 @@ def _alg33_1(password, rev, keylen):
    key = md5_hash[:keylen]
    return key

+
 # Implementation of algorithm 3.4 of the PDF standard security handler,
 # section 3.5.2 of the PDF 1.6 reference.
 def _alg34(password, owner_entry, p_entry, id1_entry):
@ -2706,6 +2932,7 @@ def _alg34(password, owner_entry, p_entry, id1_entry):
    # encryption dictionary.
    return U, key

+
 # Implementation of algorithm 3.4 of the PDF standard security handler,
 # section 3.5.2 of the PDF 1.6 reference.
 def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
--- a/Shared/lib/python3.4/site-packages/PyPDF2/utils.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/utils.py
@ -33,25 +33,35 @@ __author_email__ = "biziqe@mathieu.fenniak.net"


 import sys
-# "Str" maintains compatibility with Python 2.x.
-# The next line is obfuscated like this so 2to3 won't change it.
+
 try:
    import __builtin__ as builtins
 except ImportError:  # Py3
    import builtins


-if sys.version_info[0] < 3:
-    string_type = unicode
-    bytes_type = str
-    int_types = (int, long)
-else:
-    string_type = str
-    bytes_type = bytes
-    int_types = (int,)
+xrange_fn = getattr(builtins, "xrange", range)
+_basestring = getattr(builtins, "basestring", str)

-Xrange = getattr(builtins, "xrange", range)
-Str = getattr(builtins, "basestring", str)
+bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
+string_type = getattr(builtins, "unicode", str)
+int_types = (int, long) if sys.version_info[0] < 3 else (int,)
+
+
+# Make basic type tests more consistent
+def isString(s):
+    """Test if arg is a string. Compatible with Python 2 and 3."""
+    return isinstance(s, _basestring)
+
+
+def isInt(n):
+    """Test if arg is an int. Compatible with Python 2 and 3."""
+    return isinstance(n, int_types)
+
+
+def isBytes(b):
+    """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
+    return isinstance(b, bytes_type)


 #custom implementation of warnings.formatwarning
@ -59,6 +69,7 @@ def formatWarning(message, category, filename, lineno, line=None):
    file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
    return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)

+
 def readUntilWhitespace(stream, maxchars=None):
    """
    Reads non-whitespace characters and returns them.
@ -74,6 +85,7 @@ def readUntilWhitespace(stream, maxchars=None):
            break
    return txt

+
 def readNonWhitespace(stream):
    """
    Finds and reads the next non-whitespace character (ignores whitespace).
@ -83,6 +95,7 @@ def readNonWhitespace(stream):
        tok = stream.read(1)
    return tok

+
 def skipOverWhitespace(stream):
    """
    Similar to readNonWhitespace, but returns a Boolean if more than
@ -95,6 +108,7 @@ def skipOverWhitespace(stream):
        cnt+=1
    return (cnt > 1)

+
 def skipOverComment(stream):
    tok = stream.read(1)
    stream.seek(-1, 1)
@ -102,6 +116,7 @@ def skipOverComment(stream):
        while tok not in (b_('\n'), b_('\r')):
            tok = stream.read(1)

+
 def readUntilRegex(stream, regex, ignore_eof=False):
    """
    Reads until the regular expression pattern matched (ignore the match)
@ -125,6 +140,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
        name += tok
    return name

+
 class ConvertFunctionsToVirtualList(object):
    def __init__(self, lengthFunction, getFunction):
        self.lengthFunction = lengthFunction
@ -135,10 +151,10 @@ class ConvertFunctionsToVirtualList(object):

    def __getitem__(self, index):
        if isinstance(index, slice):
-            indices = Xrange(*index.indices(len(self)))
+            indices = xrange_fn(*index.indices(len(self)))
            cls = type(self)
            return cls(indices.__len__, lambda idx: self[indices[idx]])
-        if not isinstance(index, int_types):
+        if not isInt(index):
            raise TypeError("sequence indices must be integers")
        len_self = len(self)
        if index < 0:
@ -148,6 +164,7 @@ class ConvertFunctionsToVirtualList(object):
            raise IndexError("sequence index out of range")
        return self.getFunction(index)

+
 def RC4_encrypt(key, plaintext):
    S = [i for i in range(256)]
    j = 0
@ -164,12 +181,14 @@ def RC4_encrypt(key, plaintext):
        retval += b_(chr(ord_(plaintext[x]) ^ t))
    return retval

+
 def matrixMultiply(a, b):
    return [[sum([float(i)*float(j)
                  for i, j in zip(row, col)]
                ) for col in zip(*b)]
            for row in a]

+
 def markLocation(stream):
    """Creates text file showing current location in context."""
    # Mainly for debugging
@ -182,18 +201,23 @@ def markLocation(stream):
    outputDoc.close()
    stream.seek(-RADIUS, 1)

+
 class PyPdfError(Exception):
    pass

+
 class PdfReadError(PyPdfError):
    pass

+
 class PageSizeNotDefinedError(PyPdfError):
    pass

+
 class PdfReadWarning(UserWarning):
    pass

+
 class PdfStreamError(PdfReadError):
    pass

@ -203,6 +227,7 @@ if sys.version_info[0] < 3:
        return s
 else:
    B_CACHE = {}
+
    def b_(s):
        bc = B_CACHE
        if s in bc:
@ -214,6 +239,8 @@ else:
            if len(s) < 2:
                bc[s] = r
            return r
+
+
 def u_(s):
    if sys.version_info[0] < 3:
        return unicode(s, 'unicode_escape')
@ -230,24 +257,28 @@ def str_(b):
        else:
            return b

+
 def ord_(b):
    if sys.version_info[0] < 3 or type(b) == str:
        return ord(b)
    else:
        return b

+
 def chr_(c):
    if sys.version_info[0] < 3:
        return c
    else:
        return chr(c)

+
 def barray(b):
    if sys.version_info[0] < 3:
        return b
    else:
        return bytearray(b)

+
 def hexencode(b):
    if sys.version_info[0] < 3:
        return b.encode('hex')
@ -256,6 +287,7 @@ def hexencode(b):
        coder = codecs.getencoder('hex_codec')
        return coder(b)[0]

+
 def hexStr(num):
    return hex(num).replace('L', '')

--- a/Shared/lib/python3.4/site-packages/PyPDF2/xmp.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/xmp.py
@ -50,6 +50,7 @@ iso8601 = re.compile("""
        )?
        """, re.VERBOSE)

+
 class XmpInformation(PdfObject):
    """
    An object that represents Adobe XMP metadata.
@ -355,5 +356,3 @@ class XmpInformation(PdfObject):
    :return: a dictionary of key/value items for custom metadata properties.
    :rtype: dict
    """
-
-