update pypdf2
This commit is contained in:
parent
b8b1fe89bd
commit
66205d529e
19 changed files with 626 additions and 315 deletions
|
@ -1,32 +0,0 @@
|
|||
Metadata-Version: 1.1
|
||||
Name: PyPDF2
|
||||
Version: 1.23
|
||||
Summary: PDF toolkit
|
||||
Home-page: http://mstamy2.github.com/PyPDF2
|
||||
Author: Phaseit, Inc.
|
||||
Author-email: PyPDF2@phaseit.net
|
||||
License: UNKNOWN
|
||||
Description:
|
||||
A Pure-Python library built as a PDF toolkit. It is capable of:
|
||||
|
||||
- extracting document information (title, author, ...)
|
||||
- splitting documents page by page
|
||||
- merging documents page by page
|
||||
- cropping pages
|
||||
- merging multiple pages into a single page
|
||||
- encrypting and decrypting PDF files
|
||||
- and more!
|
||||
|
||||
By being Pure-Python, it should run on any Python platform without any
|
||||
dependencies on external libraries. It can also work entirely on StringIO
|
||||
objects rather than file streams, allowing for PDF manipulation in memory.
|
||||
It is therefore a useful tool for websites that manage or manipulate PDFs.
|
||||
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: BSD License
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@ -1,15 +0,0 @@
|
|||
CHANGELOG
|
||||
MANIFEST.in
|
||||
PyPDF2/__init__.py
|
||||
PyPDF2/_version.py
|
||||
PyPDF2/filters.py
|
||||
PyPDF2/generic.py
|
||||
PyPDF2/merger.py
|
||||
PyPDF2/pagerange.py
|
||||
PyPDF2/pdf.py
|
||||
PyPDF2/utils.py
|
||||
PyPDF2/xmp.py
|
||||
PyPDF2.egg-info/PKG-INFO
|
||||
PyPDF2.egg-info/SOURCES.txt
|
||||
PyPDF2.egg-info/dependency_links.txt
|
||||
PyPDF2.egg-info/top_level.txt
|
|
@ -1,23 +0,0 @@
|
|||
../PyPDF2/filters.py
|
||||
../PyPDF2/generic.py
|
||||
../PyPDF2/merger.py
|
||||
../PyPDF2/pagerange.py
|
||||
../PyPDF2/pdf.py
|
||||
../PyPDF2/utils.py
|
||||
../PyPDF2/xmp.py
|
||||
../PyPDF2/_version.py
|
||||
../PyPDF2/__init__.py
|
||||
../PyPDF2/__pycache__/filters.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/generic.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/merger.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/pagerange.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/pdf.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/utils.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/xmp.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/_version.cpython-34.pyc
|
||||
../PyPDF2/__pycache__/__init__.cpython-34.pyc
|
||||
./
|
||||
top_level.txt
|
||||
dependency_links.txt
|
||||
PKG-INFO
|
||||
SOURCES.txt
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
A Pure-Python library built as a PDF toolkit. It is capable of:
|
||||
|
||||
- extracting document information (title, author, ...)
|
||||
- splitting documents page by page
|
||||
- merging documents page by page
|
||||
- cropping pages
|
||||
- merging multiple pages into a single page
|
||||
- encrypting and decrypting PDF files
|
||||
- and more!
|
||||
|
||||
By being Pure-Python, it should run on any Python platform without any
|
||||
dependencies on external libraries. It can also work entirely on StringIO
|
||||
objects rather than file streams, allowing for PDF manipulation in memory.
|
||||
It is therefore a useful tool for websites that manage or manipulate PDFs.
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
pip
|
|
@ -0,0 +1,34 @@
|
|||
Metadata-Version: 2.0
|
||||
Name: PyPDF2
|
||||
Version: 1.25.1
|
||||
Summary: PDF toolkit
|
||||
Home-page: http://mstamy2.github.com/PyPDF2
|
||||
Author: Phaseit, Inc.
|
||||
Author-email: PyPDF2@phaseit.net
|
||||
License: UNKNOWN
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: BSD License
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
|
||||
|
||||
A Pure-Python library built as a PDF toolkit. It is capable of:
|
||||
|
||||
- extracting document information (title, author, ...)
|
||||
- splitting documents page by page
|
||||
- merging documents page by page
|
||||
- cropping pages
|
||||
- merging multiple pages into a single page
|
||||
- encrypting and decrypting PDF files
|
||||
- and more!
|
||||
|
||||
By being Pure-Python, it should run on any Python platform without any
|
||||
dependencies on external libraries. It can also work entirely on StringIO
|
||||
objects rather than file streams, allowing for PDF manipulation in memory.
|
||||
It is therefore a useful tool for websites that manage or manipulate PDFs.
|
||||
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
PyPDF2/__init__.py,sha256=ugkP-3fEFZZ2-54PmYpjJ5CISEPD5W8TikZlloOJZ5M,210
|
||||
PyPDF2/_version.py,sha256=ufPT1c1QzU2MdIAGUZ89UoQfl6t3IJdOjhMyLVhsDmQ,23
|
||||
PyPDF2/filters.py,sha256=U4KQ7fJX129ePxoff-6-009e9kCWlj8_d2ipnm5QDG4,13167
|
||||
PyPDF2/generic.py,sha256=bJ3e3PpqJCvTHrQ3IH3VEXMh1RWVqiCh9T1IcmkBuAo,45129
|
||||
PyPDF2/merger.py,sha256=2Cz4QaB8R-Zm3V5P2rI-QYdqMZlN4geaAtNfrPbcTM4,21387
|
||||
PyPDF2/pagerange.py,sha256=AEMerbVjzXE55sJ2EYZzBgH1Xt4NiUsHaiycoNaW8Ys,5534
|
||||
PyPDF2/pdf.py,sha256=ceuZWSZIupSbzEzw6QrbNmN9D8PrdM6dh8zHSB9Rg2o,124907
|
||||
PyPDF2/utils.py,sha256=-ZQky5qa4gsO0zprA8V_E5sTNRBSa_ungvxvxjdHr64,7833
|
||||
PyPDF2/xmp.py,sha256=vdjDUAMCqb7-AhkuNaqCanviPHMpuJ-5adY8Kxe5jUc,13639
|
||||
PyPDF2-1.25.1.dist-info/DESCRIPTION.rst,sha256=mCiWyCHYtsbQ22O_f2FbbD8CjW1GMfwvbn67J_THZ5M,600
|
||||
PyPDF2-1.25.1.dist-info/METADATA,sha256=lGFpbQOrG5_oOYPi4GlzoQT4Lyj3eCvNEHIomSf4JsU,1174
|
||||
PyPDF2-1.25.1.dist-info/RECORD,,
|
||||
PyPDF2-1.25.1.dist-info/WHEEL,sha256=bfpjj1zBtYtglW1hWtnRCmhEcEV3TH8magB_ZQeGgSg,93
|
||||
PyPDF2-1.25.1.dist-info/metadata.json,sha256=aVLfNzdnpxj8hyl12sDq-3IgfGH7t0g5gS2y6LPYtYE,692
|
||||
PyPDF2-1.25.1.dist-info/top_level.txt,sha256=BERWrwqdvKXaVKhpnMbtO6b11qPA-mBt2r9a0VPF-Ow,7
|
||||
/srv/openmedialibrary/platform/Shared/home/.local/lib/python3.5/site-packages/PyPDF2-1.25.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
PyPDF2/__pycache__/xmp.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/utils.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/pdf.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/merger.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/__init__.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/generic.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/filters.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/pagerange.cpython-35.pyc,,
|
||||
PyPDF2/__pycache__/_version.cpython-35.pyc,,
|
|
@ -0,0 +1,5 @@
|
|||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.26.0)
|
||||
Root-Is-Purelib: true
|
||||
Tag: cp35-none-any
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2", "Programming Language :: Python :: 3", "Operating System :: OS Independent", "Topic :: Software Development :: Libraries :: Python Modules"], "extensions": {"python.details": {"contacts": [{"email": "PyPDF2@phaseit.net", "name": "Phaseit, Inc.", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "http://mstamy2.github.com/PyPDF2"}}}, "generator": "bdist_wheel (0.26.0)", "metadata_version": "2.0", "name": "PyPDF2", "summary": "PDF toolkit", "version": "1.25.1"}
|
|
@ -1,2 +1 @@
|
|||
__version__ = '1.23'
|
||||
|
||||
__version__ = '1.25.1'
|
||||
|
|
|
@ -40,28 +40,35 @@ if version_info < ( 3, 0 ):
|
|||
from cStringIO import StringIO
|
||||
else:
|
||||
from io import StringIO
|
||||
import struct
|
||||
|
||||
try:
|
||||
import zlib
|
||||
|
||||
def decompress(data):
|
||||
return zlib.decompress(data)
|
||||
|
||||
def compress(data):
|
||||
return zlib.compress(data)
|
||||
|
||||
except ImportError:
|
||||
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||
# library from the .NET framework. (IronPython only)
|
||||
import System
|
||||
from System import IO, Collections, Array
|
||||
|
||||
def _string_to_bytearr(buf):
|
||||
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||
for i in range(len(buf)):
|
||||
retval[i] = ord(buf[i])
|
||||
return retval
|
||||
|
||||
def _bytearr_to_string(bytes):
|
||||
retval = ""
|
||||
for i in range(bytes.Length):
|
||||
retval += chr(bytes[i])
|
||||
return retval
|
||||
|
||||
def _read_bytes(stream):
|
||||
ms = IO.MemoryStream()
|
||||
buf = Array.CreateInstance(System.Byte, 2048)
|
||||
|
@ -74,6 +81,7 @@ except ImportError:
|
|||
retval = ms.ToArray()
|
||||
ms.Close()
|
||||
return retval
|
||||
|
||||
def decompress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
|
@ -84,6 +92,7 @@ except ImportError:
|
|||
retval = _bytearr_to_string(bytes)
|
||||
gz.Close()
|
||||
return retval
|
||||
|
||||
def compress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
|
@ -144,6 +153,7 @@ class FlateDecode(object):
|
|||
return compress(data)
|
||||
encode = staticmethod(encode)
|
||||
|
||||
|
||||
class ASCIIHexDecode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
|
@ -165,6 +175,7 @@ class ASCIIHexDecode(object):
|
|||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
|
||||
class LZWDecode(object):
|
||||
"""Taken from:
|
||||
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
|
||||
|
@ -185,7 +196,6 @@ class LZWDecode(object):
|
|||
self.dictlen=258
|
||||
self.bitspercode=9
|
||||
|
||||
|
||||
def nextCode(self):
|
||||
fillbits=self.bitspercode
|
||||
value=0
|
||||
|
@ -240,14 +250,14 @@ class LZWDecode(object):
|
|||
self.bitspercode+=1
|
||||
return baos
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def decode(data,decodeParams=None):
|
||||
return LZWDecode.decoder(data).decode()
|
||||
|
||||
|
||||
class ASCII85Decode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
if version_info < ( 3, 0 ):
|
||||
retval = ""
|
||||
group = []
|
||||
x = 0
|
||||
|
@ -297,8 +307,31 @@ class ASCII85Decode(object):
|
|||
group = []
|
||||
x += 1
|
||||
return retval
|
||||
else:
|
||||
if isinstance(data, str):
|
||||
data = data.encode('ascii')
|
||||
n = b = 0
|
||||
out = bytearray()
|
||||
for c in data:
|
||||
if ord('!') <= c and c <= ord('u'):
|
||||
n += 1
|
||||
b = b*85+(c-33)
|
||||
if n == 5:
|
||||
out += struct.pack(b'>L',b)
|
||||
n = b = 0
|
||||
elif c == ord('z'):
|
||||
assert n == 0
|
||||
out += b'\0\0\0\0'
|
||||
elif c == ord('~'):
|
||||
if n:
|
||||
for _ in range(5-n):
|
||||
b = b*85+84
|
||||
out += struct.pack(b'>L',b)[:n-1]
|
||||
break
|
||||
return bytes(out)
|
||||
decode = staticmethod(decode)
|
||||
|
||||
|
||||
def decodeStreamData(stream):
|
||||
from .generic import NameObject
|
||||
filters = stream.get("/Filter", ())
|
||||
|
@ -306,14 +339,16 @@ def decodeStreamData(stream):
|
|||
# we have a single filter instance
|
||||
filters = (filters,)
|
||||
data = stream._data
|
||||
# If there is not data to decode we should not try to decode the data.
|
||||
if data:
|
||||
for filterType in filters:
|
||||
if filterType == "/FlateDecode":
|
||||
if filterType == "/FlateDecode" or filterType == "/Fl":
|
||||
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
||||
elif filterType == "/ASCIIHexDecode":
|
||||
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
|
||||
data = ASCIIHexDecode.decode(data)
|
||||
elif filterType == "/LZWDecode":
|
||||
elif filterType == "/LZWDecode" or filterType == "/LZW":
|
||||
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
|
||||
elif filterType == "/ASCII85Decode":
|
||||
elif filterType == "/ASCII85Decode" or filterType == "/A85":
|
||||
data = ASCII85Decode.decode(data)
|
||||
elif filterType == "/Crypt":
|
||||
decodeParams = stream.get("/DecodeParams", {})
|
||||
|
|
|
@ -43,11 +43,14 @@ from . import filters
|
|||
from . import utils
|
||||
import decimal
|
||||
import codecs
|
||||
import sys
|
||||
#import debugging
|
||||
|
||||
ObjectPrefix = b_('/<[tf(n%')
|
||||
NumberSigns = b_('+-')
|
||||
IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
|
||||
|
||||
|
||||
def readObject(stream, pdf):
|
||||
tok = stream.read(1)
|
||||
stream.seek(-1, 1) # reset to start
|
||||
|
@ -94,6 +97,7 @@ def readObject(stream, pdf):
|
|||
else:
|
||||
return NumberObject.readFromStream(stream)
|
||||
|
||||
|
||||
class PdfObject(object):
|
||||
def getObject(self):
|
||||
"""Resolves indirect references."""
|
||||
|
@ -225,6 +229,7 @@ class FloatObject(decimal.Decimal, PdfObject):
|
|||
return decimal.Decimal.__new__(cls, utils.str_(value), context)
|
||||
except:
|
||||
return decimal.Decimal.__new__(cls, str(value))
|
||||
|
||||
def __repr__(self):
|
||||
if self == self.to_integral():
|
||||
return str(self.quantize(decimal.Decimal(1)))
|
||||
|
@ -244,7 +249,11 @@ class NumberObject(int, PdfObject):
|
|||
ByteDot = b_(".")
|
||||
|
||||
def __new__(cls, value):
|
||||
return int.__new__(cls, value)
|
||||
val = int(value)
|
||||
try:
|
||||
return int.__new__(cls, val)
|
||||
except OverflowError:
|
||||
return int.__new__(cls, 0)
|
||||
|
||||
def as_numeric(self):
|
||||
return int(b_(repr(self)))
|
||||
|
@ -253,16 +262,7 @@ class NumberObject(int, PdfObject):
|
|||
stream.write(b_(repr(self)))
|
||||
|
||||
def readFromStream(stream):
|
||||
num = b_("")
|
||||
while True:
|
||||
tok = stream.read(16)
|
||||
m = NumberObject.NumberPattern.search(tok)
|
||||
if m is not None:
|
||||
stream.seek(m.start() - len(tok), 1)
|
||||
num += tok[:m.start()]
|
||||
break
|
||||
|
||||
num += tok
|
||||
num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
|
||||
if num.find(NumberObject.ByteDot) != -1:
|
||||
return FloatObject(num)
|
||||
else:
|
||||
|
@ -345,13 +345,18 @@ def readStringFromStream(stream):
|
|||
tok = b_("\b")
|
||||
elif tok == b_("f"):
|
||||
tok = b_("\f")
|
||||
elif tok == b_("c"):
|
||||
tok = b_("\c")
|
||||
elif tok == b_("("):
|
||||
tok = b_("(")
|
||||
elif tok == b_(")"):
|
||||
tok = b_(")")
|
||||
elif tok == b_("/"):
|
||||
tok = b_("/")
|
||||
elif tok == b_("\\"):
|
||||
tok = b_("\\")
|
||||
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")):
|
||||
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["),
|
||||
b_("]"), b_("#"), b_("_"), b_("&"), b_('$')):
|
||||
# odd/unnessecary escape sequences we have encountered
|
||||
tok = b_(tok)
|
||||
elif tok.isdigit():
|
||||
|
@ -378,7 +383,7 @@ def readStringFromStream(stream):
|
|||
# line break was escaped:
|
||||
tok = b_('')
|
||||
else:
|
||||
raise utils.PdfReadError("Unexpected escaped string")
|
||||
raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
|
||||
txt += tok
|
||||
return createStringObject(txt)
|
||||
|
||||
|
@ -456,7 +461,7 @@ class TextStringObject(utils.string_type, PdfObject):
|
|||
|
||||
|
||||
class NameObject(str, PdfObject):
|
||||
delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]"))
|
||||
delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
|
||||
surfix = b_("/")
|
||||
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
|
@ -468,11 +473,12 @@ class NameObject(str, PdfObject):
|
|||
name = stream.read(1)
|
||||
if name != NameObject.surfix:
|
||||
raise utils.PdfReadError("name read error")
|
||||
name += utils.readUntilRegex(stream, NameObject.delimiterPattern)
|
||||
name += utils.readUntilRegex(stream, NameObject.delimiterPattern,
|
||||
ignore_eof=True)
|
||||
if debug: print(name)
|
||||
try:
|
||||
return NameObject(name.decode('utf-8'))
|
||||
except UnicodeDecodeError as e:
|
||||
except (UnicodeEncodeError, UnicodeDecodeError) as e:
|
||||
# Name objects should represent irregular characters
|
||||
# with a '#' followed by the symbol's hex number
|
||||
if not pdf.strict:
|
||||
|
@ -630,6 +636,7 @@ class DictionaryObject(dict, PdfObject):
|
|||
return retval
|
||||
readFromStream = staticmethod(readFromStream)
|
||||
|
||||
|
||||
class TreeObject(DictionaryObject):
|
||||
def __init__(self):
|
||||
DictionaryObject.__init__(self)
|
||||
|
@ -726,7 +733,6 @@ class TreeObject(DictionaryObject):
|
|||
found = True
|
||||
break
|
||||
|
||||
|
||||
prevRef = curRef
|
||||
prev = cur
|
||||
if NameObject('/Next') in cur:
|
||||
|
@ -938,6 +944,7 @@ class RectangleObject(ArrayObject):
|
|||
in (x,y) form.
|
||||
"""
|
||||
|
||||
|
||||
class Field(TreeObject):
|
||||
"""
|
||||
A class representing a field dictionary. This class is accessed through
|
||||
|
@ -1009,6 +1016,7 @@ class Field(TreeObject):
|
|||
See Section 8.5.2 of the PDF 1.7 reference.
|
||||
"""
|
||||
|
||||
|
||||
class Destination(TreeObject):
|
||||
"""
|
||||
A class representing a destination within a PDF file.
|
||||
|
@ -1157,6 +1165,7 @@ def encode_pdfdocencoding(unicode_string):
|
|||
"does not exist in translation table")
|
||||
return retval
|
||||
|
||||
|
||||
def decode_pdfdocencoding(byte_array):
|
||||
retval = u_('')
|
||||
for b in byte_array:
|
||||
|
@ -1211,4 +1220,3 @@ for i in range(256):
|
|||
continue
|
||||
assert char not in _pdfDocEncoding_rev
|
||||
_pdfDocEncoding_rev[char] = i
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from .generic import *
|
||||
from .utils import string_type
|
||||
from .utils import isString, str_
|
||||
from .pdf import PdfFileReader, PdfFileWriter
|
||||
from .pagerange import PageRange
|
||||
from sys import version_info
|
||||
|
@ -40,6 +40,7 @@ else:
|
|||
from io import FileIO as file
|
||||
StreamIO = BytesIO
|
||||
|
||||
|
||||
class _MergedPage(object):
|
||||
"""
|
||||
_MergedPage is used internally by PdfFileMerger to collect necessary
|
||||
|
@ -51,6 +52,7 @@ class _MergedPage(object):
|
|||
self.out_pagedata = None
|
||||
self.id = id
|
||||
|
||||
|
||||
class PdfFileMerger(object):
|
||||
"""
|
||||
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
|
||||
|
@ -107,7 +109,8 @@ class PdfFileMerger(object):
|
|||
# it is a PdfFileReader, copy that reader's stream into a
|
||||
# BytesIO (or StreamIO) stream.
|
||||
# If fileobj is none of the above types, it is not modified
|
||||
if type(fileobj) == string_type:
|
||||
decryption_key = None
|
||||
if isString(fileobj):
|
||||
fileobj = file(fileobj, 'rb')
|
||||
my_file = True
|
||||
elif isinstance(fileobj, file):
|
||||
|
@ -121,11 +124,15 @@ class PdfFileMerger(object):
|
|||
filecontent = StreamIO(fileobj.stream.read())
|
||||
fileobj.stream.seek(orig_tell) # reset the stream to its original location
|
||||
fileobj = filecontent
|
||||
if hasattr(fileobj, '_decryption_key'):
|
||||
decryption_key = fileobj._decryption_key
|
||||
my_file = True
|
||||
|
||||
# Create a new PdfFileReader instance using the stream
|
||||
# (either file or BytesIO or StringIO) created above
|
||||
pdfr = PdfFileReader(fileobj, strict=self.strict)
|
||||
if decryption_key is not None:
|
||||
pdfr._decryption_key = decryption_key
|
||||
|
||||
# Find the range of pages to merge.
|
||||
if pages == None:
|
||||
|
@ -167,14 +174,12 @@ class PdfFileMerger(object):
|
|||
self._associate_dests_to_pages(srcpages)
|
||||
self._associate_bookmarks_to_pages(srcpages)
|
||||
|
||||
|
||||
# Slice to insert the pages at the specified position
|
||||
self.pages[position:position] = srcpages
|
||||
|
||||
# Keep track of our input files so we can close them later
|
||||
self.inputs.append((fileobj, pdfr, my_file))
|
||||
|
||||
|
||||
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||
"""
|
||||
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
|
||||
|
@ -197,7 +202,6 @@ class PdfFileMerger(object):
|
|||
|
||||
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
|
||||
|
||||
|
||||
def write(self, fileobj):
|
||||
"""
|
||||
Writes all data that has been merged to the given output file.
|
||||
|
@ -206,11 +210,10 @@ class PdfFileMerger(object):
|
|||
file-like object.
|
||||
"""
|
||||
my_file = False
|
||||
if type(fileobj) in (str, str):
|
||||
if isString(fileobj):
|
||||
fileobj = file(fileobj, 'wb')
|
||||
my_file = True
|
||||
|
||||
|
||||
# Add pages to the PdfFileWriter
|
||||
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
|
||||
for page in self.pages:
|
||||
|
@ -229,8 +232,6 @@ class PdfFileMerger(object):
|
|||
if my_file:
|
||||
fileobj.close()
|
||||
|
||||
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Shuts all file descriptors (input and output) and clears all memory
|
||||
|
@ -298,7 +299,7 @@ class PdfFileMerger(object):
|
|||
for j in range(*pages):
|
||||
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
||||
o[NameObject('/Page')] = o['/Page'].getObject()
|
||||
assert str(k) == str(o['/Title'])
|
||||
assert str_(k) == str_(o['/Title'])
|
||||
new_dests.append(o)
|
||||
break
|
||||
return new_dests
|
||||
|
@ -348,7 +349,6 @@ class PdfFileMerger(object):
|
|||
if bookmarks == None:
|
||||
bookmarks = self.bookmarks
|
||||
|
||||
|
||||
last_added = None
|
||||
for b in bookmarks:
|
||||
if isinstance(b, list):
|
||||
|
@ -504,7 +504,6 @@ class PdfFileMerger(object):
|
|||
bmparent.insert(npos, [dest])
|
||||
return dest
|
||||
|
||||
|
||||
def addNamedDestination(self, title, pagenum):
|
||||
"""
|
||||
Add a destination to the output.
|
||||
|
|
|
@ -8,7 +8,7 @@ see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
|
|||
"""
|
||||
|
||||
import re
|
||||
from .utils import Str
|
||||
from .utils import isString
|
||||
|
||||
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
|
||||
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
|
||||
|
@ -68,7 +68,7 @@ class PageRange(object):
|
|||
self._slice = arg.to_slice()
|
||||
return
|
||||
|
||||
m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg)
|
||||
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
|
||||
if not m:
|
||||
raise ParseError(arg)
|
||||
elif m.group(2):
|
||||
|
@ -89,7 +89,7 @@ class PageRange(object):
|
|||
""" True if input is a valid initializer for a PageRange. """
|
||||
return isinstance(input, slice) or \
|
||||
isinstance(input, PageRange) or \
|
||||
(isinstance(input, Str)
|
||||
(isString(input)
|
||||
and bool(re.match(PAGE_RANGE_RE, input)))
|
||||
|
||||
def to_slice(self):
|
||||
|
|
|
@ -63,7 +63,7 @@ import warnings
|
|||
import codecs
|
||||
from .generic import *
|
||||
from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
|
||||
from .utils import Str, b_, u_, ord_, chr_, str_, string_type, formatWarning
|
||||
from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning
|
||||
|
||||
if version_info < ( 2, 4 ):
|
||||
from sets import ImmutableSet as frozenset
|
||||
|
@ -74,6 +74,7 @@ else:
|
|||
from hashlib import md5
|
||||
import uuid
|
||||
|
||||
|
||||
class PdfFileWriter(object):
|
||||
"""
|
||||
This class supports writing PDF files out, given pages produced by another
|
||||
|
@ -228,6 +229,157 @@ class PdfFileWriter(object):
|
|||
NameObject("/OpenAction"): self._addObject(js)
|
||||
})
|
||||
|
||||
def addAttachment(self, fname, fdata):
|
||||
"""
|
||||
Embed a file inside the PDF.
|
||||
|
||||
:param str fname: The filename to display.
|
||||
:param str fdata: The data in the file.
|
||||
|
||||
Reference:
|
||||
https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
|
||||
Section 7.11.3
|
||||
"""
|
||||
|
||||
# We need 3 entries:
|
||||
# * The file's data
|
||||
# * The /Filespec entry
|
||||
# * The file's name, which goes in the Catalog
|
||||
|
||||
|
||||
# The entry for the file
|
||||
""" Sample:
|
||||
8 0 obj
|
||||
<<
|
||||
/Length 12
|
||||
/Type /EmbeddedFile
|
||||
>>
|
||||
stream
|
||||
Hello world!
|
||||
endstream
|
||||
endobj
|
||||
"""
|
||||
file_entry = DecodedStreamObject()
|
||||
file_entry.setData(fdata)
|
||||
file_entry.update({
|
||||
NameObject("/Type"): NameObject("/EmbeddedFile")
|
||||
})
|
||||
|
||||
# The Filespec entry
|
||||
""" Sample:
|
||||
7 0 obj
|
||||
<<
|
||||
/Type /Filespec
|
||||
/F (hello.txt)
|
||||
/EF << /F 8 0 R >>
|
||||
>>
|
||||
"""
|
||||
efEntry = DictionaryObject()
|
||||
efEntry.update({ NameObject("/F"):file_entry })
|
||||
|
||||
filespec = DictionaryObject()
|
||||
filespec.update({
|
||||
NameObject("/Type"): NameObject("/Filespec"),
|
||||
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
|
||||
NameObject("/EF"): efEntry
|
||||
})
|
||||
|
||||
# Then create the entry for the root, as it needs a reference to the Filespec
|
||||
""" Sample:
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Outlines 2 0 R
|
||||
/Pages 3 0 R
|
||||
/Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
|
||||
>>
|
||||
endobj
|
||||
|
||||
"""
|
||||
embeddedFilesNamesDictionary = DictionaryObject()
|
||||
embeddedFilesNamesDictionary.update({
|
||||
NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
|
||||
})
|
||||
|
||||
embeddedFilesDictionary = DictionaryObject()
|
||||
embeddedFilesDictionary.update({
|
||||
NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
|
||||
})
|
||||
# Update the root
|
||||
self._root_object.update({
|
||||
NameObject("/Names"): embeddedFilesDictionary
|
||||
})
|
||||
|
||||
def appendPagesFromReader(self, reader, after_page_append=None):
|
||||
"""
|
||||
Copy pages from reader to writer. Includes an optional callback parameter
|
||||
which is invoked after pages are appended to the writer.
|
||||
|
||||
:param reader: a PdfFileReader object from which to copy page
|
||||
annotations to this writer object. The writer's annots
|
||||
will then be updated
|
||||
:callback after_page_append (function): Callback function that is invoked after
|
||||
each page is appended to the writer. Callback signature:
|
||||
|
||||
:param writer_pageref (PDF page reference): Reference to the page
|
||||
appended to the writer.
|
||||
"""
|
||||
# Get page count from writer and reader
|
||||
reader_num_pages = reader.getNumPages()
|
||||
writer_num_pages = self.getNumPages()
|
||||
|
||||
# Copy pages from reader to writer
|
||||
for rpagenum in range(0, reader_num_pages):
|
||||
reader_page = reader.getPage(rpagenum)
|
||||
self.addPage(reader_page)
|
||||
writer_page = self.getPage(writer_num_pages+rpagenum)
|
||||
# Trigger callback, pass writer page as parameter
|
||||
if callable(after_page_append): after_page_append(writer_page)
|
||||
|
||||
def updatePageFormFieldValues(self, page, fields):
|
||||
'''
|
||||
Update the form field values for a given page from a fields dictionary.
|
||||
Copy field texts and values from fields to page.
|
||||
|
||||
:param page: Page reference from PDF writer where the annotations
|
||||
and field data will be updated.
|
||||
:param fields: a Python dictionary of field names (/T) and text
|
||||
values (/V)
|
||||
'''
|
||||
# Iterate through pages, update field values
|
||||
for j in range(0, len(page['/Annots'])):
|
||||
writer_annot = page['/Annots'][j].getObject()
|
||||
for field in fields:
|
||||
if writer_annot.get('/T') == field:
|
||||
writer_annot.update({
|
||||
NameObject("/V"): TextStringObject(fields[field])
|
||||
})
|
||||
|
||||
def cloneReaderDocumentRoot(self, reader):
|
||||
'''
|
||||
Copy the reader document root to the writer.
|
||||
|
||||
:param reader: PdfFileReader from the document root should be copied.
|
||||
:callback after_page_append
|
||||
'''
|
||||
self._root_object = reader.trailer['/Root']
|
||||
|
||||
def cloneDocumentFromReader(self, reader, after_page_append=None):
|
||||
'''
|
||||
Create a copy (clone) of a document from a PDF file reader
|
||||
|
||||
:param reader: PDF file reader instance from which the clone
|
||||
should be created.
|
||||
:callback after_page_append (function): Callback function that is invoked after
|
||||
each page is appended to the writer. Signature includes a reference to the
|
||||
appended page (delegates to appendPagesFromReader). Callback signature:
|
||||
|
||||
:param writer_pageref (PDF page reference): Reference to the page just
|
||||
appended to the document.
|
||||
'''
|
||||
self.cloneReaderDocumentRoot(reader)
|
||||
self.appendPagesFromReader(reader, after_page_append)
|
||||
|
||||
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
|
||||
"""
|
||||
Encrypt this PDF file with the PDF Standard encryption handler.
|
||||
|
@ -516,7 +668,6 @@ class PdfFileWriter(object):
|
|||
|
||||
return bookmarkRef
|
||||
|
||||
|
||||
def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
|
||||
"""
|
||||
Add a bookmark to this PDF file.
|
||||
|
@ -553,7 +704,6 @@ class PdfFileWriter(object):
|
|||
if parent == None:
|
||||
parent = outlineRef
|
||||
|
||||
|
||||
bookmark = TreeObject()
|
||||
|
||||
bookmark.update({
|
||||
|
@ -759,7 +909,7 @@ class PdfFileWriter(object):
|
|||
else:
|
||||
borderArr = [NumberObject(0)] * 3
|
||||
|
||||
if isinstance(rect, Str):
|
||||
if isString(rect):
|
||||
rect = NameObject(rect)
|
||||
elif isinstance(rect, RectangleObject):
|
||||
pass
|
||||
|
@ -871,6 +1021,7 @@ class PdfFileWriter(object):
|
|||
"""Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
|
||||
and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
|
||||
|
||||
|
||||
class PdfFileReader(object):
|
||||
"""
|
||||
Initializes a PdfFileReader object. This operation can take some time, as
|
||||
|
@ -904,9 +1055,10 @@ class PdfFileReader(object):
|
|||
self.flattenedPages = None
|
||||
self.resolvedObjects = {}
|
||||
self.xrefIndex = 0
|
||||
self._pageId2Num = None # map page IndirectRef number to Page Number
|
||||
if hasattr(stream, 'mode') and 'b' not in stream.mode:
|
||||
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
|
||||
if type(stream) in (string_type, str):
|
||||
if isString(stream):
|
||||
fileobj = open(stream, 'rb')
|
||||
stream = BytesIO(b_(fileobj.read()))
|
||||
fileobj.close()
|
||||
|
@ -973,6 +1125,7 @@ class PdfFileReader(object):
|
|||
if self.isEncrypted:
|
||||
try:
|
||||
self._override_encryption = True
|
||||
self.decrypt('')
|
||||
return self.trailer["/Root"]["/Pages"]["/Count"]
|
||||
except:
|
||||
raise utils.PdfReadError("File has not been decrypted")
|
||||
|
@ -1160,7 +1313,14 @@ class PdfFileReader(object):
|
|||
|
||||
# get the outline dictionary and named destinations
|
||||
if "/Outlines" in catalog:
|
||||
try:
|
||||
lines = catalog["/Outlines"]
|
||||
except utils.PdfReadError:
|
||||
# this occurs if the /Outlines object reference is incorrect
|
||||
# for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
|
||||
# so continue to load the file without the Bookmarks
|
||||
return outlines
|
||||
|
||||
if "/First" in lines:
|
||||
node = lines["/First"]
|
||||
self._namedDests = self.getNamedDestinations()
|
||||
|
@ -1187,6 +1347,49 @@ class PdfFileReader(object):
|
|||
|
||||
return outlines
|
||||
|
||||
def _getPageNumberByIndirect(self, indirectRef):
|
||||
"""Generate _pageId2Num"""
|
||||
if self._pageId2Num is None:
|
||||
id2num = {}
|
||||
for i, x in enumerate(self.pages):
|
||||
id2num[x.indirectRef.idnum] = i
|
||||
self._pageId2Num = id2num
|
||||
|
||||
if isinstance(indirectRef, int):
|
||||
idnum = indirectRef
|
||||
else:
|
||||
idnum = indirectRef.idnum
|
||||
|
||||
ret = self._pageId2Num.get(idnum, -1)
|
||||
return ret
|
||||
|
||||
def getPageNumber(self, page):
|
||||
"""
|
||||
Retrieve page number of a given PageObject
|
||||
|
||||
:param PageObject page: The page to get page number. Should be
|
||||
an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
|
||||
:return: the page number or -1 if page not found
|
||||
:rtype: int
|
||||
"""
|
||||
indirectRef = page.indirectRef
|
||||
ret = self._getPageNumberByIndirect(indirectRef)
|
||||
return ret
|
||||
|
||||
def getDestinationPageNumber(self, destination):
|
||||
"""
|
||||
Retrieve page number of a given Destination object
|
||||
|
||||
:param Destination destination: The destination to get page number.
|
||||
Should be an instance of
|
||||
:class:`Destination<PyPDF2.pdf.Destination>`
|
||||
:return: the page number or -1 if page not found
|
||||
:rtype: int
|
||||
"""
|
||||
indirectRef = destination.page
|
||||
ret = self._getPageNumberByIndirect(indirectRef)
|
||||
return ret
|
||||
|
||||
def _buildDestination(self, title, array):
|
||||
page, typ = array[0:2]
|
||||
array = array[2:]
|
||||
|
@ -1210,7 +1413,7 @@ class PdfFileReader(object):
|
|||
if dest:
|
||||
if isinstance(dest, ArrayObject):
|
||||
outline = self._buildDestination(title, dest)
|
||||
elif isinstance(dest, Str) and dest in self._namedDests:
|
||||
elif isString(dest) and dest in self._namedDests:
|
||||
outline = self._namedDests[dest]
|
||||
outline[NameObject("/Title")] = title
|
||||
else:
|
||||
|
@ -1310,6 +1513,8 @@ class PdfFileReader(object):
|
|||
assert idx < objStm['/N']
|
||||
streamData = BytesIO(b_(objStm.getData()))
|
||||
for i in range(objStm['/N']):
|
||||
readNonWhitespace(streamData)
|
||||
streamData.seek(-1, 1)
|
||||
objnum = NumberObject.readFromStream(streamData)
|
||||
readNonWhitespace(streamData)
|
||||
streamData.seek(-1, 1)
|
||||
|
@ -1347,7 +1552,6 @@ class PdfFileReader(object):
|
|||
if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
|
||||
return NullObject()
|
||||
|
||||
|
||||
def getObject(self, indirectReference):
|
||||
debug = False
|
||||
if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
|
||||
|
@ -1470,7 +1674,7 @@ class PdfFileReader(object):
|
|||
startxref = int(line)
|
||||
except ValueError:
|
||||
# 'startxref' may be on the same line as the location
|
||||
if not line.startswith("startxref"):
|
||||
if not line.startswith(b_("startxref")):
|
||||
raise utils.PdfReadError("startxref not found")
|
||||
startxref = int(line[9:].strip())
|
||||
warnings.warn("startxref on same line as offset")
|
||||
|
@ -1580,6 +1784,7 @@ class PdfFileReader(object):
|
|||
assert len(entrySizes) >= 3
|
||||
if self.strict and len(entrySizes) > 3:
|
||||
raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
|
||||
|
||||
def getEntry(i):
|
||||
# Reads the correct number of bytes for each entry. See the
|
||||
# discussion of the W parameter in PDF spec table 17.
|
||||
|
@ -1664,8 +1869,7 @@ class PdfFileReader(object):
|
|||
if found:
|
||||
continue
|
||||
# no xref table found at specified location
|
||||
assert False
|
||||
break
|
||||
raise utils.PdfReadError("Could not find xref table at specified location")
|
||||
#if not zero-indexed, verify that the table is correct; change it if necessary
|
||||
if self.xrefIndex and not self.strict:
|
||||
loc = stream.tell()
|
||||
|
@ -1683,7 +1887,6 @@ class PdfFileReader(object):
|
|||
#if not, then either it's just plain wrong, or the non-zero-index is actually correct
|
||||
stream.seek(loc, 0) #return to where it was
|
||||
|
||||
|
||||
def _zeroXref(self, generation):
|
||||
self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
|
||||
|
||||
|
@ -1700,8 +1903,13 @@ class PdfFileReader(object):
|
|||
if debug: print(">>readNextEndLine")
|
||||
line = b_("")
|
||||
while True:
|
||||
# Prevent infinite loops in malformed PDFs
|
||||
if stream.tell() == 0:
|
||||
raise utils.PdfReadError("Could not read malformed PDF file")
|
||||
x = stream.read(1)
|
||||
if debug: print((" x:", x, "%x"%ord(x)))
|
||||
if stream.tell() < 2:
|
||||
raise utils.PdfReadError("EOL marker not found")
|
||||
stream.seek(-2, 1)
|
||||
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
|
||||
crlf = False
|
||||
|
@ -1713,6 +1921,8 @@ class PdfFileReader(object):
|
|||
if x == b_('\n') or x == b_('\r'): # account for CR+LF
|
||||
stream.seek(-1, 1)
|
||||
crlf = True
|
||||
if stream.tell() < 2:
|
||||
raise utils.PdfReadError("EOL marker not found")
|
||||
stream.seek(-2, 1)
|
||||
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
|
||||
break
|
||||
|
@ -1827,14 +2037,17 @@ def getRectangle(self, name, defaults):
|
|||
setRectangle(self, name, retval)
|
||||
return retval
|
||||
|
||||
|
||||
def setRectangle(self, name, value):
|
||||
if not isinstance(name, NameObject):
|
||||
name = NameObject(name)
|
||||
self[name] = value
|
||||
|
||||
|
||||
def deleteRectangle(self, name):
|
||||
del self[name]
|
||||
|
||||
|
||||
def createRectangleAccessor(name, fallback):
|
||||
return \
|
||||
property(
|
||||
|
@ -1843,6 +2056,7 @@ def createRectangleAccessor(name, fallback):
|
|||
lambda self: deleteRectangle(self, name)
|
||||
)
|
||||
|
||||
|
||||
class PageObject(DictionaryObject):
|
||||
"""
|
||||
This class represents a single page within a PDF file. Typically this
|
||||
|
@ -2374,6 +2588,7 @@ class PageObject(DictionaryObject):
|
|||
for i in operands[0]:
|
||||
if isinstance(i, TextStringObject):
|
||||
text += i
|
||||
text += "\n"
|
||||
return text
|
||||
|
||||
mediaBox = createRectangleAccessor("/MediaBox", ())
|
||||
|
@ -2412,6 +2627,7 @@ class PageObject(DictionaryObject):
|
|||
page's creator.
|
||||
"""
|
||||
|
||||
|
||||
class ContentStream(DecodedStreamObject):
|
||||
def __init__(self, stream, pdf):
|
||||
self.pdf = pdf
|
||||
|
@ -2437,25 +2653,25 @@ class ContentStream(DecodedStreamObject):
|
|||
if peek == b_('') or ord_(peek) == 0:
|
||||
break
|
||||
stream.seek(-1, 1)
|
||||
if peek.isalpha() or peek == "'" or peek == '"':
|
||||
if peek.isalpha() or peek == b_("'") or peek == b_('"'):
|
||||
operator = utils.readUntilRegex(stream,
|
||||
NameObject.delimiterPattern, True)
|
||||
if operator == "BI":
|
||||
if operator == b_("BI"):
|
||||
# begin inline image - a completely different parsing
|
||||
# mechanism is required, of course... thanks buddy...
|
||||
assert operands == []
|
||||
ii = self._readInlineImage(stream)
|
||||
self.operations.append((ii, "INLINE IMAGE"))
|
||||
self.operations.append((ii, b_("INLINE IMAGE")))
|
||||
else:
|
||||
self.operations.append((operands, operator))
|
||||
operands = []
|
||||
elif peek == '%':
|
||||
elif peek == b_('%'):
|
||||
# If we encounter a comment in the content stream, we have to
|
||||
# handle it here. Typically, readObject will handle
|
||||
# encountering a comment -- but readObject assumes that
|
||||
# following the comment must be the object we're trying to
|
||||
# read. In this case, it could be an operator instead.
|
||||
while peek not in ('\r', '\n'):
|
||||
while peek not in (b_('\r'), b_('\n')):
|
||||
peek = stream.read(1)
|
||||
else:
|
||||
operands.append(readObject(stream, None))
|
||||
|
@ -2467,7 +2683,7 @@ class ContentStream(DecodedStreamObject):
|
|||
while True:
|
||||
tok = readNonWhitespace(stream)
|
||||
stream.seek(-1, 1)
|
||||
if tok == "I":
|
||||
if tok == b_("I"):
|
||||
# "ID" - begin of image data
|
||||
break
|
||||
key = readObject(stream, self.pdf)
|
||||
|
@ -2477,28 +2693,32 @@ class ContentStream(DecodedStreamObject):
|
|||
settings[key] = value
|
||||
# left at beginning of ID
|
||||
tmp = stream.read(3)
|
||||
assert tmp[:2] == "ID"
|
||||
data = ""
|
||||
assert tmp[:2] == b_("ID")
|
||||
data = b_("")
|
||||
while True:
|
||||
# Read the inline image, while checking for EI (End Image) operator.
|
||||
tok = stream.read(1)
|
||||
if tok == "E":
|
||||
if tok == b_("E"):
|
||||
# Check for End Image
|
||||
next1 = stream.read(1)
|
||||
if next1 == "I":
|
||||
next2 = readNonWhitespace(stream)
|
||||
if next2 == 'Q':
|
||||
tok2 = stream.read(1)
|
||||
if tok2 == b_("I"):
|
||||
# Sometimes that data will contain EI, so check for the Q operator.
|
||||
tok3 = stream.read(1)
|
||||
info = tok + tok2
|
||||
while tok3 in utils.WHITESPACES:
|
||||
info += tok3
|
||||
tok3 = stream.read(1)
|
||||
if tok3 == b_("Q"):
|
||||
stream.seek(-1, 1)
|
||||
break
|
||||
else:
|
||||
stream.seek(-2, 1)
|
||||
data += tok
|
||||
stream.seek(-1,1)
|
||||
data += info
|
||||
else:
|
||||
stream.seek(-1, 1)
|
||||
data += tok
|
||||
else:
|
||||
data += tok
|
||||
x = readNonWhitespace(stream)
|
||||
stream.seek(-1, 1)
|
||||
return {"settings": settings, "data": data}
|
||||
|
||||
def _getData(self):
|
||||
|
@ -2525,6 +2745,7 @@ class ContentStream(DecodedStreamObject):
|
|||
|
||||
_data = property(_getData, _setData)
|
||||
|
||||
|
||||
class DocumentInformation(DictionaryObject):
|
||||
"""
|
||||
A class representing the basic document metadata provided in a PDF File.
|
||||
|
@ -2588,6 +2809,7 @@ class DocumentInformation(DictionaryObject):
|
|||
producer_raw = property(lambda self: self.get("/Producer"))
|
||||
"""The "raw" version of producer; can return a ``ByteStringObject``."""
|
||||
|
||||
|
||||
def convertToInt(d, size):
|
||||
if size > 8:
|
||||
raise utils.PdfReadError("invalid size in convertToInt")
|
||||
|
@ -2600,6 +2822,7 @@ _encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
|
|||
b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
|
||||
b_('\xa9\xfe\x64\x53\x69\x7a')
|
||||
|
||||
|
||||
# Implementation of algorithm 3.2 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
|
||||
|
@ -2643,6 +2866,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
|
|||
# entry.
|
||||
return md5_hash[:keylen]
|
||||
|
||||
|
||||
# Implementation of algorithm 3.3 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg33(owner_pwd, user_pwd, rev, keylen):
|
||||
|
@ -2670,6 +2894,7 @@ def _alg33(owner_pwd, user_pwd, rev, keylen):
|
|||
# the /O entry in the encryption dictionary.
|
||||
return val
|
||||
|
||||
|
||||
# Steps 1-4 of algorithm 3.3
|
||||
def _alg33_1(password, rev, keylen):
|
||||
# 1. Pad or truncate the owner password string as described in step 1 of
|
||||
|
@ -2692,6 +2917,7 @@ def _alg33_1(password, rev, keylen):
|
|||
key = md5_hash[:keylen]
|
||||
return key
|
||||
|
||||
|
||||
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg34(password, owner_entry, p_entry, id1_entry):
|
||||
|
@ -2706,6 +2932,7 @@ def _alg34(password, owner_entry, p_entry, id1_entry):
|
|||
# encryption dictionary.
|
||||
return U, key
|
||||
|
||||
|
||||
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
|
||||
|
|
|
@ -33,25 +33,35 @@ __author_email__ = "biziqe@mathieu.fenniak.net"
|
|||
|
||||
|
||||
import sys
|
||||
# "Str" maintains compatibility with Python 2.x.
|
||||
# The next line is obfuscated like this so 2to3 won't change it.
|
||||
|
||||
try:
|
||||
import __builtin__ as builtins
|
||||
except ImportError: # Py3
|
||||
import builtins
|
||||
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
string_type = unicode
|
||||
bytes_type = str
|
||||
int_types = (int, long)
|
||||
else:
|
||||
string_type = str
|
||||
bytes_type = bytes
|
||||
int_types = (int,)
|
||||
xrange_fn = getattr(builtins, "xrange", range)
|
||||
_basestring = getattr(builtins, "basestring", str)
|
||||
|
||||
Xrange = getattr(builtins, "xrange", range)
|
||||
Str = getattr(builtins, "basestring", str)
|
||||
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
|
||||
string_type = getattr(builtins, "unicode", str)
|
||||
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
|
||||
|
||||
|
||||
# Make basic type tests more consistent
|
||||
def isString(s):
|
||||
"""Test if arg is a string. Compatible with Python 2 and 3."""
|
||||
return isinstance(s, _basestring)
|
||||
|
||||
|
||||
def isInt(n):
|
||||
"""Test if arg is an int. Compatible with Python 2 and 3."""
|
||||
return isinstance(n, int_types)
|
||||
|
||||
|
||||
def isBytes(b):
|
||||
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
|
||||
return isinstance(b, bytes_type)
|
||||
|
||||
|
||||
#custom implementation of warnings.formatwarning
|
||||
|
@ -59,6 +69,7 @@ def formatWarning(message, category, filename, lineno, line=None):
|
|||
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
||||
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
||||
|
||||
|
||||
def readUntilWhitespace(stream, maxchars=None):
|
||||
"""
|
||||
Reads non-whitespace characters and returns them.
|
||||
|
@ -74,6 +85,7 @@ def readUntilWhitespace(stream, maxchars=None):
|
|||
break
|
||||
return txt
|
||||
|
||||
|
||||
def readNonWhitespace(stream):
|
||||
"""
|
||||
Finds and reads the next non-whitespace character (ignores whitespace).
|
||||
|
@ -83,6 +95,7 @@ def readNonWhitespace(stream):
|
|||
tok = stream.read(1)
|
||||
return tok
|
||||
|
||||
|
||||
def skipOverWhitespace(stream):
|
||||
"""
|
||||
Similar to readNonWhitespace, but returns a Boolean if more than
|
||||
|
@ -95,6 +108,7 @@ def skipOverWhitespace(stream):
|
|||
cnt+=1
|
||||
return (cnt > 1)
|
||||
|
||||
|
||||
def skipOverComment(stream):
|
||||
tok = stream.read(1)
|
||||
stream.seek(-1, 1)
|
||||
|
@ -102,6 +116,7 @@ def skipOverComment(stream):
|
|||
while tok not in (b_('\n'), b_('\r')):
|
||||
tok = stream.read(1)
|
||||
|
||||
|
||||
def readUntilRegex(stream, regex, ignore_eof=False):
|
||||
"""
|
||||
Reads until the regular expression pattern matched (ignore the match)
|
||||
|
@ -125,6 +140,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
|
|||
name += tok
|
||||
return name
|
||||
|
||||
|
||||
class ConvertFunctionsToVirtualList(object):
|
||||
def __init__(self, lengthFunction, getFunction):
|
||||
self.lengthFunction = lengthFunction
|
||||
|
@ -135,10 +151,10 @@ class ConvertFunctionsToVirtualList(object):
|
|||
|
||||
def __getitem__(self, index):
|
||||
if isinstance(index, slice):
|
||||
indices = Xrange(*index.indices(len(self)))
|
||||
indices = xrange_fn(*index.indices(len(self)))
|
||||
cls = type(self)
|
||||
return cls(indices.__len__, lambda idx: self[indices[idx]])
|
||||
if not isinstance(index, int_types):
|
||||
if not isInt(index):
|
||||
raise TypeError("sequence indices must be integers")
|
||||
len_self = len(self)
|
||||
if index < 0:
|
||||
|
@ -148,6 +164,7 @@ class ConvertFunctionsToVirtualList(object):
|
|||
raise IndexError("sequence index out of range")
|
||||
return self.getFunction(index)
|
||||
|
||||
|
||||
def RC4_encrypt(key, plaintext):
|
||||
S = [i for i in range(256)]
|
||||
j = 0
|
||||
|
@ -164,12 +181,14 @@ def RC4_encrypt(key, plaintext):
|
|||
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
||||
return retval
|
||||
|
||||
|
||||
def matrixMultiply(a, b):
|
||||
return [[sum([float(i)*float(j)
|
||||
for i, j in zip(row, col)]
|
||||
) for col in zip(*b)]
|
||||
for row in a]
|
||||
|
||||
|
||||
def markLocation(stream):
|
||||
"""Creates text file showing current location in context."""
|
||||
# Mainly for debugging
|
||||
|
@ -182,18 +201,23 @@ def markLocation(stream):
|
|||
outputDoc.close()
|
||||
stream.seek(-RADIUS, 1)
|
||||
|
||||
|
||||
class PyPdfError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PdfReadError(PyPdfError):
|
||||
pass
|
||||
|
||||
|
||||
class PageSizeNotDefinedError(PyPdfError):
|
||||
pass
|
||||
|
||||
|
||||
class PdfReadWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
class PdfStreamError(PdfReadError):
|
||||
pass
|
||||
|
||||
|
@ -203,6 +227,7 @@ if sys.version_info[0] < 3:
|
|||
return s
|
||||
else:
|
||||
B_CACHE = {}
|
||||
|
||||
def b_(s):
|
||||
bc = B_CACHE
|
||||
if s in bc:
|
||||
|
@ -214,6 +239,8 @@ else:
|
|||
if len(s) < 2:
|
||||
bc[s] = r
|
||||
return r
|
||||
|
||||
|
||||
def u_(s):
|
||||
if sys.version_info[0] < 3:
|
||||
return unicode(s, 'unicode_escape')
|
||||
|
@ -230,24 +257,28 @@ def str_(b):
|
|||
else:
|
||||
return b
|
||||
|
||||
|
||||
def ord_(b):
|
||||
if sys.version_info[0] < 3 or type(b) == str:
|
||||
return ord(b)
|
||||
else:
|
||||
return b
|
||||
|
||||
|
||||
def chr_(c):
|
||||
if sys.version_info[0] < 3:
|
||||
return c
|
||||
else:
|
||||
return chr(c)
|
||||
|
||||
|
||||
def barray(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b
|
||||
else:
|
||||
return bytearray(b)
|
||||
|
||||
|
||||
def hexencode(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b.encode('hex')
|
||||
|
@ -256,6 +287,7 @@ def hexencode(b):
|
|||
coder = codecs.getencoder('hex_codec')
|
||||
return coder(b)[0]
|
||||
|
||||
|
||||
def hexStr(num):
|
||||
return hex(num).replace('L', '')
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ iso8601 = re.compile("""
|
|||
)?
|
||||
""", re.VERBOSE)
|
||||
|
||||
|
||||
class XmpInformation(PdfObject):
|
||||
"""
|
||||
An object that represents Adobe XMP metadata.
|
||||
|
@ -355,5 +356,3 @@ class XmpInformation(PdfObject):
|
|||
:return: a dictionary of key/value items for custom metadata properties.
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue