diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/PKG-INFO b/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/PKG-INFO deleted file mode 100644 index 994e807..0000000 --- a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/PKG-INFO +++ /dev/null @@ -1,32 +0,0 @@ -Metadata-Version: 1.1 -Name: PyPDF2 -Version: 1.23 -Summary: PDF toolkit -Home-page: http://mstamy2.github.com/PyPDF2 -Author: Phaseit, Inc. -Author-email: PyPDF2@phaseit.net -License: UNKNOWN -Description: - A Pure-Python library built as a PDF toolkit. It is capable of: - - - extracting document information (title, author, ...) - - splitting documents page by page - - merging documents page by page - - cropping pages - - merging multiple pages into a single page - - encrypting and decrypting PDF files - - and more! - - By being Pure-Python, it should run on any Python platform without any - dependencies on external libraries. It can also work entirely on StringIO - objects rather than file streams, allowing for PDF manipulation in memory. - It is therefore a useful tool for websites that manage or manipulate PDFs. - -Platform: UNKNOWN -Classifier: Development Status :: 5 - Production/Stable -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: BSD License -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 3 -Classifier: Operating System :: OS Independent -Classifier: Topic :: Software Development :: Libraries :: Python Modules diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/SOURCES.txt b/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/SOURCES.txt deleted file mode 100644 index 0c7a469..0000000 --- a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/SOURCES.txt +++ /dev/null @@ -1,15 +0,0 @@ -CHANGELOG -MANIFEST.in -PyPDF2/__init__.py -PyPDF2/_version.py -PyPDF2/filters.py -PyPDF2/generic.py -PyPDF2/merger.py -PyPDF2/pagerange.py -PyPDF2/pdf.py -PyPDF2/utils.py -PyPDF2/xmp.py -PyPDF2.egg-info/PKG-INFO -PyPDF2.egg-info/SOURCES.txt -PyPDF2.egg-info/dependency_links.txt -PyPDF2.egg-info/top_level.txt \ No newline at end of file diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/dependency_links.txt b/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/installed-files.txt b/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/installed-files.txt deleted file mode 100644 index d3945c1..0000000 --- a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/installed-files.txt +++ /dev/null @@ -1,23 +0,0 @@ -../PyPDF2/filters.py -../PyPDF2/generic.py -../PyPDF2/merger.py -../PyPDF2/pagerange.py -../PyPDF2/pdf.py -../PyPDF2/utils.py -../PyPDF2/xmp.py -../PyPDF2/_version.py -../PyPDF2/__init__.py -../PyPDF2/__pycache__/filters.cpython-34.pyc -../PyPDF2/__pycache__/generic.cpython-34.pyc -../PyPDF2/__pycache__/merger.cpython-34.pyc -../PyPDF2/__pycache__/pagerange.cpython-34.pyc -../PyPDF2/__pycache__/pdf.cpython-34.pyc -../PyPDF2/__pycache__/utils.cpython-34.pyc -../PyPDF2/__pycache__/xmp.cpython-34.pyc -../PyPDF2/__pycache__/_version.cpython-34.pyc -../PyPDF2/__pycache__/__init__.cpython-34.pyc -./ -top_level.txt -dependency_links.txt -PKG-INFO -SOURCES.txt diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/DESCRIPTION.rst b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/DESCRIPTION.rst new file mode 100644 index 0000000..dc9292c --- /dev/null +++ b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/DESCRIPTION.rst @@ -0,0 +1,17 @@ + +A Pure-Python library built as a PDF toolkit. It is capable of: + +- extracting document information (title, author, ...) +- splitting documents page by page +- merging documents page by page +- cropping pages +- merging multiple pages into a single page +- encrypting and decrypting PDF files +- and more! + +By being Pure-Python, it should run on any Python platform without any +dependencies on external libraries. It can also work entirely on StringIO +objects rather than file streams, allowing for PDF manipulation in memory. +It is therefore a useful tool for websites that manage or manipulate PDFs. + + diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/INSTALLER b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/METADATA b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/METADATA new file mode 100644 index 0000000..34688a7 --- /dev/null +++ b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/METADATA @@ -0,0 +1,34 @@ +Metadata-Version: 2.0 +Name: PyPDF2 +Version: 1.25.1 +Summary: PDF toolkit +Home-page: http://mstamy2.github.com/PyPDF2 +Author: Phaseit, Inc. +Author-email: PyPDF2@phaseit.net +License: UNKNOWN +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Operating System :: OS Independent +Classifier: Topic :: Software Development :: Libraries :: Python Modules + + +A Pure-Python library built as a PDF toolkit. It is capable of: + +- extracting document information (title, author, ...) +- splitting documents page by page +- merging documents page by page +- cropping pages +- merging multiple pages into a single page +- encrypting and decrypting PDF files +- and more! + +By being Pure-Python, it should run on any Python platform without any +dependencies on external libraries. It can also work entirely on StringIO +objects rather than file streams, allowing for PDF manipulation in memory. +It is therefore a useful tool for websites that manage or manipulate PDFs. + + diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/RECORD b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/RECORD new file mode 100644 index 0000000..b16eebc --- /dev/null +++ b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/RECORD @@ -0,0 +1,25 @@ +PyPDF2/__init__.py,sha256=ugkP-3fEFZZ2-54PmYpjJ5CISEPD5W8TikZlloOJZ5M,210 +PyPDF2/_version.py,sha256=ufPT1c1QzU2MdIAGUZ89UoQfl6t3IJdOjhMyLVhsDmQ,23 +PyPDF2/filters.py,sha256=U4KQ7fJX129ePxoff-6-009e9kCWlj8_d2ipnm5QDG4,13167 +PyPDF2/generic.py,sha256=bJ3e3PpqJCvTHrQ3IH3VEXMh1RWVqiCh9T1IcmkBuAo,45129 +PyPDF2/merger.py,sha256=2Cz4QaB8R-Zm3V5P2rI-QYdqMZlN4geaAtNfrPbcTM4,21387 +PyPDF2/pagerange.py,sha256=AEMerbVjzXE55sJ2EYZzBgH1Xt4NiUsHaiycoNaW8Ys,5534 +PyPDF2/pdf.py,sha256=ceuZWSZIupSbzEzw6QrbNmN9D8PrdM6dh8zHSB9Rg2o,124907 +PyPDF2/utils.py,sha256=-ZQky5qa4gsO0zprA8V_E5sTNRBSa_ungvxvxjdHr64,7833 +PyPDF2/xmp.py,sha256=vdjDUAMCqb7-AhkuNaqCanviPHMpuJ-5adY8Kxe5jUc,13639 +PyPDF2-1.25.1.dist-info/DESCRIPTION.rst,sha256=mCiWyCHYtsbQ22O_f2FbbD8CjW1GMfwvbn67J_THZ5M,600 +PyPDF2-1.25.1.dist-info/METADATA,sha256=lGFpbQOrG5_oOYPi4GlzoQT4Lyj3eCvNEHIomSf4JsU,1174 +PyPDF2-1.25.1.dist-info/RECORD,, +PyPDF2-1.25.1.dist-info/WHEEL,sha256=bfpjj1zBtYtglW1hWtnRCmhEcEV3TH8magB_ZQeGgSg,93 +PyPDF2-1.25.1.dist-info/metadata.json,sha256=aVLfNzdnpxj8hyl12sDq-3IgfGH7t0g5gS2y6LPYtYE,692 +PyPDF2-1.25.1.dist-info/top_level.txt,sha256=BERWrwqdvKXaVKhpnMbtO6b11qPA-mBt2r9a0VPF-Ow,7 +/srv/openmedialibrary/platform/Shared/home/.local/lib/python3.5/site-packages/PyPDF2-1.25.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +PyPDF2/__pycache__/xmp.cpython-35.pyc,, +PyPDF2/__pycache__/utils.cpython-35.pyc,, +PyPDF2/__pycache__/pdf.cpython-35.pyc,, +PyPDF2/__pycache__/merger.cpython-35.pyc,, +PyPDF2/__pycache__/__init__.cpython-35.pyc,, +PyPDF2/__pycache__/generic.cpython-35.pyc,, +PyPDF2/__pycache__/filters.cpython-35.pyc,, +PyPDF2/__pycache__/pagerange.cpython-35.pyc,, +PyPDF2/__pycache__/_version.cpython-35.pyc,, diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/WHEEL b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/WHEEL new file mode 100644 index 0000000..21a0bff --- /dev/null +++ b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.26.0) +Root-Is-Purelib: true +Tag: cp35-none-any + diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/metadata.json b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/metadata.json new file mode 100644 index 0000000..672ae35 --- /dev/null +++ b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/metadata.json @@ -0,0 +1 @@ +{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2", "Programming Language :: Python :: 3", "Operating System :: OS Independent", "Topic :: Software Development :: Libraries :: Python Modules"], "extensions": {"python.details": {"contacts": [{"email": "PyPDF2@phaseit.net", "name": "Phaseit, Inc.", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "http://mstamy2.github.com/PyPDF2"}}}, "generator": "bdist_wheel (0.26.0)", "metadata_version": "2.0", "name": "PyPDF2", "summary": "PDF toolkit", "version": "1.25.1"} \ No newline at end of file diff --git a/Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/top_level.txt b/Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/top_level.txt similarity index 100% rename from Shared/lib/python3.4/site-packages/PyPDF2-1.23.egg-info/top_level.txt rename to Shared/lib/python3.4/site-packages/PyPDF2-1.25.1.dist-info/top_level.txt diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/_version.py b/Shared/lib/python3.4/site-packages/PyPDF2/_version.py index 27c9a69..760870c 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/_version.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/_version.py @@ -1,2 +1 @@ -__version__ = '1.23' - +__version__ = '1.25.1' diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/filters.py b/Shared/lib/python3.4/site-packages/PyPDF2/filters.py index 45477c6..3717fd4 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/filters.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/filters.py @@ -40,28 +40,35 @@ if version_info < ( 3, 0 ): from cStringIO import StringIO else: from io import StringIO + import struct try: import zlib + def decompress(data): return zlib.decompress(data) + def compress(data): return zlib.compress(data) + except ImportError: # Unable to import zlib. Attempt to use the System.IO.Compression # library from the .NET framework. (IronPython only) import System from System import IO, Collections, Array + def _string_to_bytearr(buf): retval = Array.CreateInstance(System.Byte, len(buf)) for i in range(len(buf)): retval[i] = ord(buf[i]) return retval + def _bytearr_to_string(bytes): retval = "" for i in range(bytes.Length): retval += chr(bytes[i]) return retval + def _read_bytes(stream): ms = IO.MemoryStream() buf = Array.CreateInstance(System.Byte, 2048) @@ -74,6 +81,7 @@ except ImportError: retval = ms.ToArray() ms.Close() return retval + def decompress(data): bytes = _string_to_bytearr(data) ms = IO.MemoryStream() @@ -84,6 +92,7 @@ except ImportError: retval = _bytearr_to_string(bytes) gz.Close() return retval + def compress(data): bytes = _string_to_bytearr(data) ms = IO.MemoryStream() @@ -106,7 +115,7 @@ class FlateDecode(object): predictor = decodeParms.get("/Predictor", 1) except AttributeError: pass # usually an array with a null object was read - + # predictor 1 == no predictor if predictor != 1: columns = decodeParms["/Columns"] @@ -144,6 +153,7 @@ class FlateDecode(object): return compress(data) encode = staticmethod(encode) + class ASCIIHexDecode(object): def decode(data, decodeParms=None): retval = "" @@ -165,6 +175,7 @@ class ASCIIHexDecode(object): return retval decode = staticmethod(decode) + class LZWDecode(object): """Taken from: http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm @@ -184,7 +195,6 @@ class LZWDecode(object): def resetDict(self): self.dictlen=258 self.bitspercode=9 - def nextCode(self): fillbits=self.bitspercode @@ -196,8 +206,8 @@ class LZWDecode(object): bitsfromhere=8-self.bitpos if bitsfromhere>fillbits: bitsfromhere=fillbits - value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & - (0xff >> (8-bitsfromhere))) << + value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & + (0xff >> (8-bitsfromhere))) << (fillbits-bitsfromhere)) fillbits -= bitsfromhere self.bitpos += bitsfromhere @@ -235,70 +245,93 @@ class LZWDecode(object): baos+=p self.dict[self.dictlen] = p; self.dictlen+=1 - if (self.dictlen >= (1 << self.bitspercode) - 1 and + if (self.dictlen >= (1 << self.bitspercode) - 1 and self.bitspercode < 12): self.bitspercode+=1 return baos - - @staticmethod def decode(data,decodeParams=None): return LZWDecode.decoder(data).decode() + class ASCII85Decode(object): def decode(data, decodeParms=None): - retval = "" - group = [] - x = 0 - hitEod = False - # remove all whitespace from data - data = [y for y in data if not (y in ' \n\r\t')] - while not hitEod: - c = data[x] - if len(retval) == 0 and c == "<" and data[x+1] == "~": - x += 2 - continue - #elif c.isspace(): - # x += 1 - # continue - elif c == 'z': - assert len(group) == 0 - retval += '\x00\x00\x00\x00' - x += 1 - continue - elif c == "~" and data[x+1] == ">": - if len(group) != 0: - # cannot have a final group of just 1 char - assert len(group) > 1 - cnt = len(group) - 1 - group += [ 85, 85, 85 ] - hitEod = cnt + if version_info < ( 3, 0 ): + retval = "" + group = [] + x = 0 + hitEod = False + # remove all whitespace from data + data = [y for y in data if not (y in ' \n\r\t')] + while not hitEod: + c = data[x] + if len(retval) == 0 and c == "<" and data[x+1] == "~": + x += 2 + continue + #elif c.isspace(): + # x += 1 + # continue + elif c == 'z': + assert len(group) == 0 + retval += '\x00\x00\x00\x00' + x += 1 + continue + elif c == "~" and data[x+1] == ">": + if len(group) != 0: + # cannot have a final group of just 1 char + assert len(group) > 1 + cnt = len(group) - 1 + group += [ 85, 85, 85 ] + hitEod = cnt + else: + break else: + c = ord(c) - 33 + assert c >= 0 and c < 85 + group += [ c ] + if len(group) >= 5: + b = group[0] * (85**4) + \ + group[1] * (85**3) + \ + group[2] * (85**2) + \ + group[3] * 85 + \ + group[4] + assert b < (2**32 - 1) + c4 = chr((b >> 0) % 256) + c3 = chr((b >> 8) % 256) + c2 = chr((b >> 16) % 256) + c1 = chr(b >> 24) + retval += (c1 + c2 + c3 + c4) + if hitEod: + retval = retval[:-4+hitEod] + group = [] + x += 1 + return retval + else: + if isinstance(data, str): + data = data.encode('ascii') + n = b = 0 + out = bytearray() + for c in data: + if ord('!') <= c and c <= ord('u'): + n += 1 + b = b*85+(c-33) + if n == 5: + out += struct.pack(b'>L',b) + n = b = 0 + elif c == ord('z'): + assert n == 0 + out += b'\0\0\0\0' + elif c == ord('~'): + if n: + for _ in range(5-n): + b = b*85+84 + out += struct.pack(b'>L',b)[:n-1] break - else: - c = ord(c) - 33 - assert c >= 0 and c < 85 - group += [ c ] - if len(group) >= 5: - b = group[0] * (85**4) + \ - group[1] * (85**3) + \ - group[2] * (85**2) + \ - group[3] * 85 + \ - group[4] - assert b < (2**32 - 1) - c4 = chr((b >> 0) % 256) - c3 = chr((b >> 8) % 256) - c2 = chr((b >> 16) % 256) - c1 = chr(b >> 24) - retval += (c1 + c2 + c3 + c4) - if hitEod: - retval = retval[:-4+hitEod] - group = [] - x += 1 - return retval + return bytes(out) decode = staticmethod(decode) + def decodeStreamData(stream): from .generic import NameObject filters = stream.get("/Filter", ()) @@ -306,22 +339,24 @@ def decodeStreamData(stream): # we have a single filter instance filters = (filters,) data = stream._data - for filterType in filters: - if filterType == "/FlateDecode": - data = FlateDecode.decode(data, stream.get("/DecodeParms")) - elif filterType == "/ASCIIHexDecode": - data = ASCIIHexDecode.decode(data) - elif filterType == "/LZWDecode": - data = LZWDecode.decode(data, stream.get("/DecodeParms")) - elif filterType == "/ASCII85Decode": - data = ASCII85Decode.decode(data) - elif filterType == "/Crypt": - decodeParams = stream.get("/DecodeParams", {}) - if "/Name" not in decodeParams and "/Type" not in decodeParams: - pass + # If there is not data to decode we should not try to decode the data. + if data: + for filterType in filters: + if filterType == "/FlateDecode" or filterType == "/Fl": + data = FlateDecode.decode(data, stream.get("/DecodeParms")) + elif filterType == "/ASCIIHexDecode" or filterType == "/AHx": + data = ASCIIHexDecode.decode(data) + elif filterType == "/LZWDecode" or filterType == "/LZW": + data = LZWDecode.decode(data, stream.get("/DecodeParms")) + elif filterType == "/ASCII85Decode" or filterType == "/A85": + data = ASCII85Decode.decode(data) + elif filterType == "/Crypt": + decodeParams = stream.get("/DecodeParams", {}) + if "/Name" not in decodeParams and "/Type" not in decodeParams: + pass + else: + raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") else: - raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") - else: - # unsupported filter - raise NotImplementedError("unsupported filter %s" % filterType) + # unsupported filter + raise NotImplementedError("unsupported filter %s" % filterType) return data diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/generic.py b/Shared/lib/python3.4/site-packages/PyPDF2/generic.py index 0b66c5e..df1e028 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/generic.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/generic.py @@ -43,11 +43,14 @@ from . import filters from . import utils import decimal import codecs +import sys #import debugging ObjectPrefix = b_('/<[tf(n%') NumberSigns = b_('+-') IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]")) + + def readObject(stream, pdf): tok = stream.read(1) stream.seek(-1, 1) # reset to start @@ -94,6 +97,7 @@ def readObject(stream, pdf): else: return NumberObject.readFromStream(stream) + class PdfObject(object): def getObject(self): """Resolves indirect references.""" @@ -225,6 +229,7 @@ class FloatObject(decimal.Decimal, PdfObject): return decimal.Decimal.__new__(cls, utils.str_(value), context) except: return decimal.Decimal.__new__(cls, str(value)) + def __repr__(self): if self == self.to_integral(): return str(self.quantize(decimal.Decimal(1))) @@ -244,7 +249,11 @@ class NumberObject(int, PdfObject): ByteDot = b_(".") def __new__(cls, value): - return int.__new__(cls, value) + val = int(value) + try: + return int.__new__(cls, val) + except OverflowError: + return int.__new__(cls, 0) def as_numeric(self): return int(b_(repr(self))) @@ -253,16 +262,7 @@ class NumberObject(int, PdfObject): stream.write(b_(repr(self))) def readFromStream(stream): - num = b_("") - while True: - tok = stream.read(16) - m = NumberObject.NumberPattern.search(tok) - if m is not None: - stream.seek(m.start() - len(tok), 1) - num += tok[:m.start()] - break - - num += tok + num = utils.readUntilRegex(stream, NumberObject.NumberPattern) if num.find(NumberObject.ByteDot) != -1: return FloatObject(num) else: @@ -345,13 +345,18 @@ def readStringFromStream(stream): tok = b_("\b") elif tok == b_("f"): tok = b_("\f") + elif tok == b_("c"): + tok = b_("\c") elif tok == b_("("): tok = b_("(") elif tok == b_(")"): tok = b_(")") + elif tok == b_("/"): + tok = b_("/") elif tok == b_("\\"): tok = b_("\\") - elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")): + elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), + b_("]"), b_("#"), b_("_"), b_("&"), b_('$')): # odd/unnessecary escape sequences we have encountered tok = b_(tok) elif tok.isdigit(): @@ -378,7 +383,7 @@ def readStringFromStream(stream): # line break was escaped: tok = b_('') else: - raise utils.PdfReadError("Unexpected escaped string") + raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok) txt += tok return createStringObject(txt) @@ -456,7 +461,7 @@ class TextStringObject(utils.string_type, PdfObject): class NameObject(str, PdfObject): - delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]")) + delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]")) surfix = b_("/") def writeToStream(self, stream, encryption_key): @@ -468,11 +473,12 @@ class NameObject(str, PdfObject): name = stream.read(1) if name != NameObject.surfix: raise utils.PdfReadError("name read error") - name += utils.readUntilRegex(stream, NameObject.delimiterPattern) + name += utils.readUntilRegex(stream, NameObject.delimiterPattern, + ignore_eof=True) if debug: print(name) try: return NameObject(name.decode('utf-8')) - except UnicodeDecodeError as e: + except (UnicodeEncodeError, UnicodeDecodeError) as e: # Name objects should represent irregular characters # with a '#' followed by the symbol's hex number if not pdf.strict: @@ -630,6 +636,7 @@ class DictionaryObject(dict, PdfObject): return retval readFromStream = staticmethod(readFromStream) + class TreeObject(DictionaryObject): def __init__(self): DictionaryObject.__init__(self) @@ -726,7 +733,6 @@ class TreeObject(DictionaryObject): found = True break - prevRef = curRef prev = cur if NameObject('/Next') in cur: @@ -938,6 +944,7 @@ class RectangleObject(ArrayObject): in (x,y) form. """ + class Field(TreeObject): """ A class representing a field dictionary. This class is accessed through @@ -1009,6 +1016,7 @@ class Field(TreeObject): See Section 8.5.2 of the PDF 1.7 reference. """ + class Destination(TreeObject): """ A class representing a destination within a PDF file. @@ -1157,6 +1165,7 @@ def encode_pdfdocencoding(unicode_string): "does not exist in translation table") return retval + def decode_pdfdocencoding(byte_array): retval = u_('') for b in byte_array: @@ -1211,4 +1220,3 @@ for i in range(256): continue assert char not in _pdfDocEncoding_rev _pdfDocEncoding_rev[char] = i - diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/merger.py b/Shared/lib/python3.4/site-packages/PyPDF2/merger.py index c8e6a62..27702ad 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/merger.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/merger.py @@ -28,7 +28,7 @@ # POSSIBILITY OF SUCH DAMAGE. from .generic import * -from .utils import string_type +from .utils import isString, str_ from .pdf import PdfFileReader, PdfFileWriter from .pagerange import PageRange from sys import version_info @@ -40,6 +40,7 @@ else: from io import FileIO as file StreamIO = BytesIO + class _MergedPage(object): """ _MergedPage is used internally by PdfFileMerger to collect necessary @@ -50,13 +51,14 @@ class _MergedPage(object): self.pagedata = pagedata self.out_pagedata = None self.id = id - + + class PdfFileMerger(object): """ Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs into a single PDF. It can concatenate, slice, insert, or any combination of the above. - + See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. @@ -64,7 +66,7 @@ class PdfFileMerger(object): problems and also causes some correctable problems to be fatal. Defaults to ``True``. """ - + def __init__(self, strict=True): self.inputs = [] self.pages = [] @@ -73,7 +75,7 @@ class PdfFileMerger(object): self.named_dests = [] self.id_count = 0 self.strict = strict - + def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True): """ Merges the pages from the given file into the output file at the @@ -85,29 +87,30 @@ class PdfFileMerger(object): :param fileobj: A File Object or an object that supports the standard read and seek methods similar to a File Object. Could also be a string representing a path to a PDF file. - + :param str bookmark: Optionally, you may specify a bookmark to be applied at the beginning of the included file by supplying the text of the bookmark. :param pages: can be a :ref:`Page Range ` or a ``(start, stop[, step])`` tuple to merge only the specified range of pages from the source document into the output document. - + :param bool import_bookmarks: You may prevent the source document's bookmarks from being imported by specifying this as ``False``. """ - + # This parameter is passed to self.inputs.append and means # that the stream used was created in this method. my_file = False - + # If the fileobj parameter is a string, assume it is a path # and create a file object at that location. If it is a file, - # copy the file's contents into a BytesIO (or StreamIO) stream object; if - # it is a PdfFileReader, copy that reader's stream into a + # copy the file's contents into a BytesIO (or StreamIO) stream object; if + # it is a PdfFileReader, copy that reader's stream into a # BytesIO (or StreamIO) stream. # If fileobj is none of the above types, it is not modified - if type(fileobj) == string_type: + decryption_key = None + if isString(fileobj): fileobj = file(fileobj, 'rb') my_file = True elif isinstance(fileobj, file): @@ -116,17 +119,21 @@ class PdfFileMerger(object): fileobj = StreamIO(filecontent) my_file = True elif isinstance(fileobj, PdfFileReader): - orig_tell = fileobj.stream.tell() + orig_tell = fileobj.stream.tell() fileobj.stream.seek(0) filecontent = StreamIO(fileobj.stream.read()) fileobj.stream.seek(orig_tell) # reset the stream to its original location fileobj = filecontent + if hasattr(fileobj, '_decryption_key'): + decryption_key = fileobj._decryption_key my_file = True - + # Create a new PdfFileReader instance using the stream # (either file or BytesIO or StringIO) created above pdfr = PdfFileReader(fileobj, strict=self.strict) - + if decryption_key is not None: + pdfr._decryption_key = decryption_key + # Find the range of pages to merge. if pages == None: pages = (0, pdfr.getNumPages()) @@ -134,47 +141,45 @@ class PdfFileMerger(object): pages = pages.indices(pdfr.getNumPages()) elif not isinstance(pages, tuple): raise TypeError('"pages" must be a tuple of (start, stop[, step])') - + srcpages = [] if bookmark: bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit')) - + outline = [] if import_bookmarks: outline = pdfr.getOutlines() outline = self._trim_outline(pdfr, outline, pages) - + if bookmark: self.bookmarks += [bookmark, outline] else: self.bookmarks += outline - + dests = pdfr.namedDestinations dests = self._trim_dests(pdfr, dests, pages) self.named_dests += dests - + # Gather all the pages that are going to be merged for i in range(*pages): pg = pdfr.getPage(i) - + id = self.id_count self.id_count += 1 - + mp = _MergedPage(pg, pdfr, id) - + srcpages.append(mp) self._associate_dests_to_pages(srcpages) self._associate_bookmarks_to_pages(srcpages) - - + # Slice to insert the pages at the specified position self.pages[position:position] = srcpages - + # Keep track of our input files so we can close them later self.inputs.append((fileobj, pdfr, my_file)) - - + def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True): """ Identical to the :meth:`merge()` method, but assumes you want to concatenate @@ -183,7 +188,7 @@ class PdfFileMerger(object): :param fileobj: A File Object or an object that supports the standard read and seek methods similar to a File Object. Could also be a string representing a path to a PDF file. - + :param str bookmark: Optionally, you may specify a bookmark to be applied at the beginning of the included file by supplying the text of the bookmark. @@ -194,10 +199,9 @@ class PdfFileMerger(object): :param bool import_bookmarks: You may prevent the source document's bookmarks from being imported by specifying this as ``False``. """ - + self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks) - - + def write(self, fileobj): """ Writes all data that has been merged to the given output file. @@ -206,11 +210,10 @@ class PdfFileMerger(object): file-like object. """ my_file = False - if type(fileobj) in (str, str): + if isString(fileobj): fileobj = file(fileobj, 'wb') my_file = True - # Add pages to the PdfFileWriter # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13 for page in self.pages: @@ -222,15 +225,13 @@ class PdfFileMerger(object): # Once all pages are added, create bookmarks to point at those pages self._write_dests() self._write_bookmarks() - - # Write the output to the file + + # Write the output to the file self.output.write(fileobj) - + if my_file: fileobj.close() - - def close(self): """ Shuts all file descriptors (input and output) and clears all memory @@ -240,7 +241,7 @@ class PdfFileMerger(object): for fo, pdfr, mine in self.inputs: if mine: fo.close() - + self.inputs = [] self.output = None @@ -253,7 +254,7 @@ class PdfFileMerger(object): Example: ``{u'/Title': u'My title'}`` """ self.output.addMetadata(infos) - + def setPageLayout(self, layout): """ Set the page layout @@ -289,7 +290,7 @@ class PdfFileMerger(object): def _trim_dests(self, pdf, dests, pages): """ - Removes any named destinations that are not a part of the specified + Removes any named destinations that are not a part of the specified page set. """ new_dests = [] @@ -298,14 +299,14 @@ class PdfFileMerger(object): for j in range(*pages): if pdf.getPage(j).getObject() == o['/Page'].getObject(): o[NameObject('/Page')] = o['/Page'].getObject() - assert str(k) == str(o['/Title']) + assert str_(k) == str_(o['/Title']) new_dests.append(o) break return new_dests - + def _trim_outline(self, pdf, outline, pages): """ - Removes any outline/bookmark entries that are not a part of the + Removes any outline/bookmark entries that are not a part of the specified page set. """ new_outline = [] @@ -326,10 +327,10 @@ class PdfFileMerger(object): prev_header_added = True break return new_outline - + def _write_dests(self): dests = self.named_dests - + for v in dests: pageno = None pdf = None @@ -342,19 +343,18 @@ class PdfFileMerger(object): break if pageno != None: self.output.addNamedDestinationObject(v) - + def _write_bookmarks(self, bookmarks=None, parent=None): - + if bookmarks == None: bookmarks = self.bookmarks - last_added = None for b in bookmarks: if isinstance(b, list): self._write_bookmarks(b, last_added) continue - + pageno = None pdf = None if '/Page' in b: @@ -410,31 +410,31 @@ class PdfFileMerger(object): del b['/Left'], b['/Right'], b['/Bottom'], b['/Top'] b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)}) - + pageno = i pdf = p.src break if pageno != None: del b['/Page'], b['/Type'] - last_added = self.output.addBookmarkDict(b, parent) + last_added = self.output.addBookmarkDict(b, parent) def _associate_dests_to_pages(self, pages): for nd in self.named_dests: pageno = None np = nd['/Page'] - + if isinstance(np, NumberObject): continue - + for p in pages: if np.getObject() == p.pagedata.getObject(): pageno = p.id - + if pageno != None: nd[NameObject('/Page')] = NumberObject(pageno) else: raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],)) - + def _associate_bookmarks_to_pages(self, pages, bookmarks=None): if bookmarks == None: bookmarks = self.bookmarks @@ -443,35 +443,35 @@ class PdfFileMerger(object): if isinstance(b, list): self._associate_bookmarks_to_pages(pages, b) continue - + pageno = None bp = b['/Page'] - + if isinstance(bp, NumberObject): continue - + for p in pages: if bp.getObject() == p.pagedata.getObject(): pageno = p.id - + if pageno != None: b[NameObject('/Page')] = NumberObject(pageno) else: raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],)) - + def findBookmark(self, bookmark, root=None): - if root == None: - root = self.bookmarks - - for i, b in enumerate(root): - if isinstance(b, list): - res = self.findBookmark(bookmark, b) - if res: - return [i] + res - elif b == bookmark or b['/Title'] == bookmark: - return [i] - - return None + if root == None: + root = self.bookmarks + + for i, b in enumerate(root): + if isinstance(b, list): + res = self.findBookmark(bookmark, b) + if res: + return [i] + res + elif b == bookmark or b['/Title'] == bookmark: + return [i] + + return None def addBookmark(self, title, pagenum, parent=None): """ @@ -483,28 +483,27 @@ class PdfFileMerger(object): bookmarks. """ if parent == None: - iloc = [len(self.bookmarks)-1] + iloc = [len(self.bookmarks)-1] elif isinstance(parent, list): - iloc = parent + iloc = parent else: - iloc = self.findBookmark(parent) - + iloc = self.findBookmark(parent) + dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826)) - + if parent == None: - self.bookmarks.append(dest) + self.bookmarks.append(dest) else: - bmparent = self.bookmarks - for i in iloc[:-1]: - bmparent = bmparent[i] - npos = iloc[-1]+1 - if npos < len(bmparent) and isinstance(bmparent[npos], list): - bmparent[npos].append(dest) - else: - bmparent.insert(npos, [dest]) + bmparent = self.bookmarks + for i in iloc[:-1]: + bmparent = bmparent[i] + npos = iloc[-1]+1 + if npos < len(bmparent) and isinstance(bmparent[npos], list): + bmparent[npos].append(dest) + else: + bmparent.insert(npos, [dest]) return dest - - + def addNamedDestination(self, title, pagenum): """ Add a destination to the output. @@ -512,7 +511,7 @@ class PdfFileMerger(object): :param str title: Title to use :param int pagenum: Page number this destination points at. """ - + dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826)) self.named_dests.append(dest) @@ -523,12 +522,12 @@ class OutlinesObject(list): self.tree = tree self.pdf = pdf self.parent = parent - + def remove(self, index): obj = self[index] del self[index] self.tree.removeChild(obj) - + def add(self, title, pagenum): pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum] action = DictionaryObject() @@ -547,7 +546,7 @@ class OutlinesObject(list): self.pdf._addObject(bookmark) self.tree.addChild(bookmark) - + def removeAll(self): for child in [x for x in self.tree.children()]: self.tree.removeChild(child) diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/pagerange.py b/Shared/lib/python3.4/site-packages/PyPDF2/pagerange.py index 2da762a..ce96ec5 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/pagerange.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/pagerange.py @@ -8,7 +8,7 @@ see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE """ import re -from .utils import Str +from .utils import isString _INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE) @@ -32,11 +32,11 @@ PAGE_RANGE_HELP = """Remember, page indices start with zero. ::-1 all pages in reverse order. """ - + class PageRange(object): - """ + """ A slice-like representation of a range of page indices, - i.e. page numbers, only starting at zero. + i.e. page numbers, only starting at zero. The syntax is like what you would put between brackets [ ]. The slice is one of the few Python types that can't be subclassed, but this class converts to and from slices, and allows similar use. @@ -46,7 +46,7 @@ class PageRange(object): o str() and repr() allow printing. o indices(n) is like slice.indices(n). """ - + def __init__(self, arg): """ Initialize with either a slice -- giving the equivalent page range, @@ -67,8 +67,8 @@ class PageRange(object): if isinstance(arg, PageRange): self._slice = arg.to_slice() return - - m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg) + + m = isString(arg) and re.match(PAGE_RANGE_RE, arg) if not m: raise ParseError(arg) elif m.group(2): @@ -77,25 +77,25 @@ class PageRange(object): stop = start + 1 if start != -1 else None self._slice = slice(start, stop) else: - self._slice = slice(*[int(g) if g else None + self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)]) - + # Just formatting this when there is __doc__ for __init__ if __init__.__doc__: __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP) - + @staticmethod def valid(input): """ True if input is a valid initializer for a PageRange. """ return isinstance(input, slice) or \ isinstance(input, PageRange) or \ - (isinstance(input, Str) + (isString(input) and bool(re.match(PAGE_RANGE_RE, input))) def to_slice(self): """ Return the slice equivalent of this page range. """ return self._slice - + def __str__(self): """ A string like "1:2:3". """ s = self._slice @@ -127,7 +127,7 @@ def parse_filename_page_ranges(args): """ Given a list of filenames and page ranges, return a list of (filename, page_range) pairs. - First arg must be a filename; other ags are filenames, page-range + First arg must be a filename; other ags are filenames, page-range expressions, slice objects, or PageRange objects. A filename not followed by a page range indicates all pages of the file. """ @@ -146,7 +146,7 @@ def parse_filename_page_ranges(args): # New filename or end of list--do all of the previous file? if pdf_filename and not did_page_range: pairs.append( (pdf_filename, PAGE_RANGE_ALL) ) - + pdf_filename = arg did_page_range = False return pairs diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py index 2e53247..5522e4b 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py @@ -63,7 +63,7 @@ import warnings import codecs from .generic import * from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList -from .utils import Str, b_, u_, ord_, chr_, str_, string_type, formatWarning +from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning if version_info < ( 2, 4 ): from sets import ImmutableSet as frozenset @@ -74,6 +74,7 @@ else: from hashlib import md5 import uuid + class PdfFileWriter(object): """ This class supports writing PDF files out, given pages produced by another @@ -228,6 +229,157 @@ class PdfFileWriter(object): NameObject("/OpenAction"): self._addObject(js) }) + def addAttachment(self, fname, fdata): + """ + Embed a file inside the PDF. + + :param str fname: The filename to display. + :param str fdata: The data in the file. + + Reference: + https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf + Section 7.11.3 + """ + + # We need 3 entries: + # * The file's data + # * The /Filespec entry + # * The file's name, which goes in the Catalog + + + # The entry for the file + """ Sample: + 8 0 obj + << + /Length 12 + /Type /EmbeddedFile + >> + stream + Hello world! + endstream + endobj + """ + file_entry = DecodedStreamObject() + file_entry.setData(fdata) + file_entry.update({ + NameObject("/Type"): NameObject("/EmbeddedFile") + }) + + # The Filespec entry + """ Sample: + 7 0 obj + << + /Type /Filespec + /F (hello.txt) + /EF << /F 8 0 R >> + >> + """ + efEntry = DictionaryObject() + efEntry.update({ NameObject("/F"):file_entry }) + + filespec = DictionaryObject() + filespec.update({ + NameObject("/Type"): NameObject("/Filespec"), + NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject + NameObject("/EF"): efEntry + }) + + # Then create the entry for the root, as it needs a reference to the Filespec + """ Sample: + 1 0 obj + << + /Type /Catalog + /Outlines 2 0 R + /Pages 3 0 R + /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >> + >> + endobj + + """ + embeddedFilesNamesDictionary = DictionaryObject() + embeddedFilesNamesDictionary.update({ + NameObject("/Names"): ArrayObject([createStringObject(fname), filespec]) + }) + + embeddedFilesDictionary = DictionaryObject() + embeddedFilesDictionary.update({ + NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary + }) + # Update the root + self._root_object.update({ + NameObject("/Names"): embeddedFilesDictionary + }) + + def appendPagesFromReader(self, reader, after_page_append=None): + """ + Copy pages from reader to writer. Includes an optional callback parameter + which is invoked after pages are appended to the writer. + + :param reader: a PdfFileReader object from which to copy page + annotations to this writer object. The writer's annots + will then be updated + :callback after_page_append (function): Callback function that is invoked after + each page is appended to the writer. Callback signature: + + :param writer_pageref (PDF page reference): Reference to the page + appended to the writer. + """ + # Get page count from writer and reader + reader_num_pages = reader.getNumPages() + writer_num_pages = self.getNumPages() + + # Copy pages from reader to writer + for rpagenum in range(0, reader_num_pages): + reader_page = reader.getPage(rpagenum) + self.addPage(reader_page) + writer_page = self.getPage(writer_num_pages+rpagenum) + # Trigger callback, pass writer page as parameter + if callable(after_page_append): after_page_append(writer_page) + + def updatePageFormFieldValues(self, page, fields): + ''' + Update the form field values for a given page from a fields dictionary. + Copy field texts and values from fields to page. + + :param page: Page reference from PDF writer where the annotations + and field data will be updated. + :param fields: a Python dictionary of field names (/T) and text + values (/V) + ''' + # Iterate through pages, update field values + for j in range(0, len(page['/Annots'])): + writer_annot = page['/Annots'][j].getObject() + for field in fields: + if writer_annot.get('/T') == field: + writer_annot.update({ + NameObject("/V"): TextStringObject(fields[field]) + }) + + def cloneReaderDocumentRoot(self, reader): + ''' + Copy the reader document root to the writer. + + :param reader: PdfFileReader from the document root should be copied. + :callback after_page_append + ''' + self._root_object = reader.trailer['/Root'] + + def cloneDocumentFromReader(self, reader, after_page_append=None): + ''' + Create a copy (clone) of a document from a PDF file reader + + :param reader: PDF file reader instance from which the clone + should be created. + :callback after_page_append (function): Callback function that is invoked after + each page is appended to the writer. Signature includes a reference to the + appended page (delegates to appendPagesFromReader). Callback signature: + + :param writer_pageref (PDF page reference): Reference to the page just + appended to the document. + ''' + self.cloneReaderDocumentRoot(reader) + self.appendPagesFromReader(reader, after_page_append) + def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True): """ Encrypt this PDF file with the PDF Standard encryption handler. @@ -516,7 +668,6 @@ class PdfFileWriter(object): return bookmarkRef - def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args): """ Add a bookmark to this PDF file. @@ -553,7 +704,6 @@ class PdfFileWriter(object): if parent == None: parent = outlineRef - bookmark = TreeObject() bookmark.update({ @@ -759,7 +909,7 @@ class PdfFileWriter(object): else: borderArr = [NumberObject(0)] * 3 - if isinstance(rect, Str): + if isString(rect): rect = NameObject(rect) elif isinstance(rect, RectangleObject): pass @@ -871,6 +1021,7 @@ class PdfFileWriter(object): """Read and write property accessing the :meth:`getPageMode()` and :meth:`setPageMode()` methods.""" + class PdfFileReader(object): """ Initializes a PdfFileReader object. This operation can take some time, as @@ -904,9 +1055,10 @@ class PdfFileReader(object): self.flattenedPages = None self.resolvedObjects = {} self.xrefIndex = 0 + self._pageId2Num = None # map page IndirectRef number to Page Number if hasattr(stream, 'mode') and 'b' not in stream.mode: warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning) - if type(stream) in (string_type, str): + if isString(stream): fileobj = open(stream, 'rb') stream = BytesIO(b_(fileobj.read())) fileobj.close() @@ -973,6 +1125,7 @@ class PdfFileReader(object): if self.isEncrypted: try: self._override_encryption = True + self.decrypt('') return self.trailer["/Root"]["/Pages"]["/Count"] except: raise utils.PdfReadError("File has not been decrypted") @@ -1160,7 +1313,14 @@ class PdfFileReader(object): # get the outline dictionary and named destinations if "/Outlines" in catalog: - lines = catalog["/Outlines"] + try: + lines = catalog["/Outlines"] + except utils.PdfReadError: + # this occurs if the /Outlines object reference is incorrect + # for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf + # so continue to load the file without the Bookmarks + return outlines + if "/First" in lines: node = lines["/First"] self._namedDests = self.getNamedDestinations() @@ -1187,6 +1347,49 @@ class PdfFileReader(object): return outlines + def _getPageNumberByIndirect(self, indirectRef): + """Generate _pageId2Num""" + if self._pageId2Num is None: + id2num = {} + for i, x in enumerate(self.pages): + id2num[x.indirectRef.idnum] = i + self._pageId2Num = id2num + + if isinstance(indirectRef, int): + idnum = indirectRef + else: + idnum = indirectRef.idnum + + ret = self._pageId2Num.get(idnum, -1) + return ret + + def getPageNumber(self, page): + """ + Retrieve page number of a given PageObject + + :param PageObject page: The page to get page number. Should be + an instance of :class:`PageObject` + :return: the page number or -1 if page not found + :rtype: int + """ + indirectRef = page.indirectRef + ret = self._getPageNumberByIndirect(indirectRef) + return ret + + def getDestinationPageNumber(self, destination): + """ + Retrieve page number of a given Destination object + + :param Destination destination: The destination to get page number. + Should be an instance of + :class:`Destination` + :return: the page number or -1 if page not found + :rtype: int + """ + indirectRef = destination.page + ret = self._getPageNumberByIndirect(indirectRef) + return ret + def _buildDestination(self, title, array): page, typ = array[0:2] array = array[2:] @@ -1210,7 +1413,7 @@ class PdfFileReader(object): if dest: if isinstance(dest, ArrayObject): outline = self._buildDestination(title, dest) - elif isinstance(dest, Str) and dest in self._namedDests: + elif isString(dest) and dest in self._namedDests: outline = self._namedDests[dest] outline[NameObject("/Title")] = title else: @@ -1310,6 +1513,8 @@ class PdfFileReader(object): assert idx < objStm['/N'] streamData = BytesIO(b_(objStm.getData())) for i in range(objStm['/N']): + readNonWhitespace(streamData) + streamData.seek(-1, 1) objnum = NumberObject.readFromStream(streamData) readNonWhitespace(streamData) streamData.seek(-1, 1) @@ -1347,7 +1552,6 @@ class PdfFileReader(object): if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.") return NullObject() - def getObject(self, indirectReference): debug = False if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation)) @@ -1470,7 +1674,7 @@ class PdfFileReader(object): startxref = int(line) except ValueError: # 'startxref' may be on the same line as the location - if not line.startswith("startxref"): + if not line.startswith(b_("startxref")): raise utils.PdfReadError("startxref not found") startxref = int(line[9:].strip()) warnings.warn("startxref on same line as offset") @@ -1580,6 +1784,7 @@ class PdfFileReader(object): assert len(entrySizes) >= 3 if self.strict and len(entrySizes) > 3: raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes) + def getEntry(i): # Reads the correct number of bytes for each entry. See the # discussion of the W parameter in PDF spec table 17. @@ -1664,8 +1869,7 @@ class PdfFileReader(object): if found: continue # no xref table found at specified location - assert False - break + raise utils.PdfReadError("Could not find xref table at specified location") #if not zero-indexed, verify that the table is correct; change it if necessary if self.xrefIndex and not self.strict: loc = stream.tell() @@ -1683,7 +1887,6 @@ class PdfFileReader(object): #if not, then either it's just plain wrong, or the non-zero-index is actually correct stream.seek(loc, 0) #return to where it was - def _zeroXref(self, generation): self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) ) @@ -1700,8 +1903,13 @@ class PdfFileReader(object): if debug: print(">>readNextEndLine") line = b_("") while True: + # Prevent infinite loops in malformed PDFs + if stream.tell() == 0: + raise utils.PdfReadError("Could not read malformed PDF file") x = stream.read(1) if debug: print((" x:", x, "%x"%ord(x))) + if stream.tell() < 2: + raise utils.PdfReadError("EOL marker not found") stream.seek(-2, 1) if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR crlf = False @@ -1713,6 +1921,8 @@ class PdfFileReader(object): if x == b_('\n') or x == b_('\r'): # account for CR+LF stream.seek(-1, 1) crlf = True + if stream.tell() < 2: + raise utils.PdfReadError("EOL marker not found") stream.seek(-2, 1) stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1 break @@ -1827,14 +2037,17 @@ def getRectangle(self, name, defaults): setRectangle(self, name, retval) return retval + def setRectangle(self, name, value): if not isinstance(name, NameObject): name = NameObject(name) self[name] = value + def deleteRectangle(self, name): del self[name] + def createRectangleAccessor(name, fallback): return \ property( @@ -1843,6 +2056,7 @@ def createRectangleAccessor(name, fallback): lambda self: deleteRectangle(self, name) ) + class PageObject(DictionaryObject): """ This class represents a single page within a PDF file. Typically this @@ -2374,6 +2588,7 @@ class PageObject(DictionaryObject): for i in operands[0]: if isinstance(i, TextStringObject): text += i + text += "\n" return text mediaBox = createRectangleAccessor("/MediaBox", ()) @@ -2412,6 +2627,7 @@ class PageObject(DictionaryObject): page's creator. """ + class ContentStream(DecodedStreamObject): def __init__(self, stream, pdf): self.pdf = pdf @@ -2437,25 +2653,25 @@ class ContentStream(DecodedStreamObject): if peek == b_('') or ord_(peek) == 0: break stream.seek(-1, 1) - if peek.isalpha() or peek == "'" or peek == '"': + if peek.isalpha() or peek == b_("'") or peek == b_('"'): operator = utils.readUntilRegex(stream, NameObject.delimiterPattern, True) - if operator == "BI": + if operator == b_("BI"): # begin inline image - a completely different parsing # mechanism is required, of course... thanks buddy... assert operands == [] ii = self._readInlineImage(stream) - self.operations.append((ii, "INLINE IMAGE")) + self.operations.append((ii, b_("INLINE IMAGE"))) else: self.operations.append((operands, operator)) operands = [] - elif peek == '%': + elif peek == b_('%'): # If we encounter a comment in the content stream, we have to # handle it here. Typically, readObject will handle # encountering a comment -- but readObject assumes that # following the comment must be the object we're trying to # read. In this case, it could be an operator instead. - while peek not in ('\r', '\n'): + while peek not in (b_('\r'), b_('\n')): peek = stream.read(1) else: operands.append(readObject(stream, None)) @@ -2467,7 +2683,7 @@ class ContentStream(DecodedStreamObject): while True: tok = readNonWhitespace(stream) stream.seek(-1, 1) - if tok == "I": + if tok == b_("I"): # "ID" - begin of image data break key = readObject(stream, self.pdf) @@ -2477,28 +2693,32 @@ class ContentStream(DecodedStreamObject): settings[key] = value # left at beginning of ID tmp = stream.read(3) - assert tmp[:2] == "ID" - data = "" + assert tmp[:2] == b_("ID") + data = b_("") while True: + # Read the inline image, while checking for EI (End Image) operator. tok = stream.read(1) - if tok == "E": + if tok == b_("E"): # Check for End Image - next1 = stream.read(1) - if next1 == "I": - next2 = readNonWhitespace(stream) - if next2 == 'Q': + tok2 = stream.read(1) + if tok2 == b_("I"): + # Sometimes that data will contain EI, so check for the Q operator. + tok3 = stream.read(1) + info = tok + tok2 + while tok3 in utils.WHITESPACES: + info += tok3 + tok3 = stream.read(1) + if tok3 == b_("Q"): stream.seek(-1, 1) break else: - stream.seek(-2, 1) - data += tok + stream.seek(-1,1) + data += info else: stream.seek(-1, 1) data += tok else: data += tok - x = readNonWhitespace(stream) - stream.seek(-1, 1) return {"settings": settings, "data": data} def _getData(self): @@ -2525,6 +2745,7 @@ class ContentStream(DecodedStreamObject): _data = property(_getData, _setData) + class DocumentInformation(DictionaryObject): """ A class representing the basic document metadata provided in a PDF File. @@ -2588,6 +2809,7 @@ class DocumentInformation(DictionaryObject): producer_raw = property(lambda self: self.get("/Producer")) """The "raw" version of producer; can return a ``ByteStringObject``.""" + def convertToInt(d, size): if size > 8: raise utils.PdfReadError("invalid size in convertToInt") @@ -2600,6 +2822,7 @@ _encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \ b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \ b_('\xa9\xfe\x64\x53\x69\x7a') + # Implementation of algorithm 3.2 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True): @@ -2643,6 +2866,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr # entry. return md5_hash[:keylen] + # Implementation of algorithm 3.3 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg33(owner_pwd, user_pwd, rev, keylen): @@ -2670,6 +2894,7 @@ def _alg33(owner_pwd, user_pwd, rev, keylen): # the /O entry in the encryption dictionary. return val + # Steps 1-4 of algorithm 3.3 def _alg33_1(password, rev, keylen): # 1. Pad or truncate the owner password string as described in step 1 of @@ -2692,6 +2917,7 @@ def _alg33_1(password, rev, keylen): key = md5_hash[:keylen] return key + # Implementation of algorithm 3.4 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg34(password, owner_entry, p_entry, id1_entry): @@ -2706,6 +2932,7 @@ def _alg34(password, owner_entry, p_entry, id1_entry): # encryption dictionary. return U, key + # Implementation of algorithm 3.4 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt): diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/utils.py b/Shared/lib/python3.4/site-packages/PyPDF2/utils.py index c2a7abf..718a875 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/utils.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/utils.py @@ -33,25 +33,35 @@ __author_email__ = "biziqe@mathieu.fenniak.net" import sys -# "Str" maintains compatibility with Python 2.x. -# The next line is obfuscated like this so 2to3 won't change it. + try: import __builtin__ as builtins except ImportError: # Py3 import builtins -if sys.version_info[0] < 3: - string_type = unicode - bytes_type = str - int_types = (int, long) -else: - string_type = str - bytes_type = bytes - int_types = (int,) +xrange_fn = getattr(builtins, "xrange", range) +_basestring = getattr(builtins, "basestring", str) -Xrange = getattr(builtins, "xrange", range) -Str = getattr(builtins, "basestring", str) +bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X +string_type = getattr(builtins, "unicode", str) +int_types = (int, long) if sys.version_info[0] < 3 else (int,) + + +# Make basic type tests more consistent +def isString(s): + """Test if arg is a string. Compatible with Python 2 and 3.""" + return isinstance(s, _basestring) + + +def isInt(n): + """Test if arg is an int. Compatible with Python 2 and 3.""" + return isinstance(n, int_types) + + +def isBytes(b): + """Test if arg is a bytes instance. Compatible with Python 2 and 3.""" + return isinstance(b, bytes_type) #custom implementation of warnings.formatwarning @@ -59,6 +69,7 @@ def formatWarning(message, category, filename, lineno, line=None): file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno) + def readUntilWhitespace(stream, maxchars=None): """ Reads non-whitespace characters and returns them. @@ -74,6 +85,7 @@ def readUntilWhitespace(stream, maxchars=None): break return txt + def readNonWhitespace(stream): """ Finds and reads the next non-whitespace character (ignores whitespace). @@ -83,6 +95,7 @@ def readNonWhitespace(stream): tok = stream.read(1) return tok + def skipOverWhitespace(stream): """ Similar to readNonWhitespace, but returns a Boolean if more than @@ -95,6 +108,7 @@ def skipOverWhitespace(stream): cnt+=1 return (cnt > 1) + def skipOverComment(stream): tok = stream.read(1) stream.seek(-1, 1) @@ -102,6 +116,7 @@ def skipOverComment(stream): while tok not in (b_('\n'), b_('\r')): tok = stream.read(1) + def readUntilRegex(stream, regex, ignore_eof=False): """ Reads until the regular expression pattern matched (ignore the match) @@ -125,6 +140,7 @@ def readUntilRegex(stream, regex, ignore_eof=False): name += tok return name + class ConvertFunctionsToVirtualList(object): def __init__(self, lengthFunction, getFunction): self.lengthFunction = lengthFunction @@ -135,10 +151,10 @@ class ConvertFunctionsToVirtualList(object): def __getitem__(self, index): if isinstance(index, slice): - indices = Xrange(*index.indices(len(self))) + indices = xrange_fn(*index.indices(len(self))) cls = type(self) return cls(indices.__len__, lambda idx: self[indices[idx]]) - if not isinstance(index, int_types): + if not isInt(index): raise TypeError("sequence indices must be integers") len_self = len(self) if index < 0: @@ -148,6 +164,7 @@ class ConvertFunctionsToVirtualList(object): raise IndexError("sequence index out of range") return self.getFunction(index) + def RC4_encrypt(key, plaintext): S = [i for i in range(256)] j = 0 @@ -164,12 +181,14 @@ def RC4_encrypt(key, plaintext): retval += b_(chr(ord_(plaintext[x]) ^ t)) return retval + def matrixMultiply(a, b): return [[sum([float(i)*float(j) for i, j in zip(row, col)] ) for col in zip(*b)] for row in a] + def markLocation(stream): """Creates text file showing current location in context.""" # Mainly for debugging @@ -182,18 +201,23 @@ def markLocation(stream): outputDoc.close() stream.seek(-RADIUS, 1) + class PyPdfError(Exception): pass + class PdfReadError(PyPdfError): pass + class PageSizeNotDefinedError(PyPdfError): pass + class PdfReadWarning(UserWarning): pass + class PdfStreamError(PdfReadError): pass @@ -203,6 +227,7 @@ if sys.version_info[0] < 3: return s else: B_CACHE = {} + def b_(s): bc = B_CACHE if s in bc: @@ -214,6 +239,8 @@ else: if len(s) < 2: bc[s] = r return r + + def u_(s): if sys.version_info[0] < 3: return unicode(s, 'unicode_escape') @@ -230,24 +257,28 @@ def str_(b): else: return b + def ord_(b): if sys.version_info[0] < 3 or type(b) == str: return ord(b) else: return b + def chr_(c): if sys.version_info[0] < 3: return c else: return chr(c) + def barray(b): if sys.version_info[0] < 3: return b else: return bytearray(b) + def hexencode(b): if sys.version_info[0] < 3: return b.encode('hex') @@ -256,6 +287,7 @@ def hexencode(b): coder = codecs.getencoder('hex_codec') return coder(b)[0] + def hexStr(num): return hex(num).replace('L', '') diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/xmp.py b/Shared/lib/python3.4/site-packages/PyPDF2/xmp.py index c58c592..7ba62f0 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/xmp.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/xmp.py @@ -50,6 +50,7 @@ iso8601 = re.compile(""" )? """, re.VERBOSE) + class XmpInformation(PdfObject): """ An object that represents Adobe XMP metadata. @@ -355,5 +356,3 @@ class XmpInformation(PdfObject): :return: a dictionary of key/value items for custom metadata properties. :rtype: dict """ - -