update pypdf2
This commit is contained in:
parent
b8b1fe89bd
commit
66205d529e
19 changed files with 626 additions and 315 deletions
|
@ -1,32 +0,0 @@
|
||||||
Metadata-Version: 1.1
|
|
||||||
Name: PyPDF2
|
|
||||||
Version: 1.23
|
|
||||||
Summary: PDF toolkit
|
|
||||||
Home-page: http://mstamy2.github.com/PyPDF2
|
|
||||||
Author: Phaseit, Inc.
|
|
||||||
Author-email: PyPDF2@phaseit.net
|
|
||||||
License: UNKNOWN
|
|
||||||
Description:
|
|
||||||
A Pure-Python library built as a PDF toolkit. It is capable of:
|
|
||||||
|
|
||||||
- extracting document information (title, author, ...)
|
|
||||||
- splitting documents page by page
|
|
||||||
- merging documents page by page
|
|
||||||
- cropping pages
|
|
||||||
- merging multiple pages into a single page
|
|
||||||
- encrypting and decrypting PDF files
|
|
||||||
- and more!
|
|
||||||
|
|
||||||
By being Pure-Python, it should run on any Python platform without any
|
|
||||||
dependencies on external libraries. It can also work entirely on StringIO
|
|
||||||
objects rather than file streams, allowing for PDF manipulation in memory.
|
|
||||||
It is therefore a useful tool for websites that manage or manipulate PDFs.
|
|
||||||
|
|
||||||
Platform: UNKNOWN
|
|
||||||
Classifier: Development Status :: 5 - Production/Stable
|
|
||||||
Classifier: Intended Audience :: Developers
|
|
||||||
Classifier: License :: OSI Approved :: BSD License
|
|
||||||
Classifier: Programming Language :: Python :: 2
|
|
||||||
Classifier: Programming Language :: Python :: 3
|
|
||||||
Classifier: Operating System :: OS Independent
|
|
||||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
@ -1,15 +0,0 @@
|
||||||
CHANGELOG
|
|
||||||
MANIFEST.in
|
|
||||||
PyPDF2/__init__.py
|
|
||||||
PyPDF2/_version.py
|
|
||||||
PyPDF2/filters.py
|
|
||||||
PyPDF2/generic.py
|
|
||||||
PyPDF2/merger.py
|
|
||||||
PyPDF2/pagerange.py
|
|
||||||
PyPDF2/pdf.py
|
|
||||||
PyPDF2/utils.py
|
|
||||||
PyPDF2/xmp.py
|
|
||||||
PyPDF2.egg-info/PKG-INFO
|
|
||||||
PyPDF2.egg-info/SOURCES.txt
|
|
||||||
PyPDF2.egg-info/dependency_links.txt
|
|
||||||
PyPDF2.egg-info/top_level.txt
|
|
|
@ -1,23 +0,0 @@
|
||||||
../PyPDF2/filters.py
|
|
||||||
../PyPDF2/generic.py
|
|
||||||
../PyPDF2/merger.py
|
|
||||||
../PyPDF2/pagerange.py
|
|
||||||
../PyPDF2/pdf.py
|
|
||||||
../PyPDF2/utils.py
|
|
||||||
../PyPDF2/xmp.py
|
|
||||||
../PyPDF2/_version.py
|
|
||||||
../PyPDF2/__init__.py
|
|
||||||
../PyPDF2/__pycache__/filters.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/generic.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/merger.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/pagerange.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/pdf.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/utils.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/xmp.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/_version.cpython-34.pyc
|
|
||||||
../PyPDF2/__pycache__/__init__.cpython-34.pyc
|
|
||||||
./
|
|
||||||
top_level.txt
|
|
||||||
dependency_links.txt
|
|
||||||
PKG-INFO
|
|
||||||
SOURCES.txt
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
|
||||||
|
A Pure-Python library built as a PDF toolkit. It is capable of:
|
||||||
|
|
||||||
|
- extracting document information (title, author, ...)
|
||||||
|
- splitting documents page by page
|
||||||
|
- merging documents page by page
|
||||||
|
- cropping pages
|
||||||
|
- merging multiple pages into a single page
|
||||||
|
- encrypting and decrypting PDF files
|
||||||
|
- and more!
|
||||||
|
|
||||||
|
By being Pure-Python, it should run on any Python platform without any
|
||||||
|
dependencies on external libraries. It can also work entirely on StringIO
|
||||||
|
objects rather than file streams, allowing for PDF manipulation in memory.
|
||||||
|
It is therefore a useful tool for websites that manage or manipulate PDFs.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
pip
|
|
@ -0,0 +1,34 @@
|
||||||
|
Metadata-Version: 2.0
|
||||||
|
Name: PyPDF2
|
||||||
|
Version: 1.25.1
|
||||||
|
Summary: PDF toolkit
|
||||||
|
Home-page: http://mstamy2.github.com/PyPDF2
|
||||||
|
Author: Phaseit, Inc.
|
||||||
|
Author-email: PyPDF2@phaseit.net
|
||||||
|
License: UNKNOWN
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: BSD License
|
||||||
|
Classifier: Programming Language :: Python :: 2
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
|
||||||
|
|
||||||
|
A Pure-Python library built as a PDF toolkit. It is capable of:
|
||||||
|
|
||||||
|
- extracting document information (title, author, ...)
|
||||||
|
- splitting documents page by page
|
||||||
|
- merging documents page by page
|
||||||
|
- cropping pages
|
||||||
|
- merging multiple pages into a single page
|
||||||
|
- encrypting and decrypting PDF files
|
||||||
|
- and more!
|
||||||
|
|
||||||
|
By being Pure-Python, it should run on any Python platform without any
|
||||||
|
dependencies on external libraries. It can also work entirely on StringIO
|
||||||
|
objects rather than file streams, allowing for PDF manipulation in memory.
|
||||||
|
It is therefore a useful tool for websites that manage or manipulate PDFs.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
PyPDF2/__init__.py,sha256=ugkP-3fEFZZ2-54PmYpjJ5CISEPD5W8TikZlloOJZ5M,210
|
||||||
|
PyPDF2/_version.py,sha256=ufPT1c1QzU2MdIAGUZ89UoQfl6t3IJdOjhMyLVhsDmQ,23
|
||||||
|
PyPDF2/filters.py,sha256=U4KQ7fJX129ePxoff-6-009e9kCWlj8_d2ipnm5QDG4,13167
|
||||||
|
PyPDF2/generic.py,sha256=bJ3e3PpqJCvTHrQ3IH3VEXMh1RWVqiCh9T1IcmkBuAo,45129
|
||||||
|
PyPDF2/merger.py,sha256=2Cz4QaB8R-Zm3V5P2rI-QYdqMZlN4geaAtNfrPbcTM4,21387
|
||||||
|
PyPDF2/pagerange.py,sha256=AEMerbVjzXE55sJ2EYZzBgH1Xt4NiUsHaiycoNaW8Ys,5534
|
||||||
|
PyPDF2/pdf.py,sha256=ceuZWSZIupSbzEzw6QrbNmN9D8PrdM6dh8zHSB9Rg2o,124907
|
||||||
|
PyPDF2/utils.py,sha256=-ZQky5qa4gsO0zprA8V_E5sTNRBSa_ungvxvxjdHr64,7833
|
||||||
|
PyPDF2/xmp.py,sha256=vdjDUAMCqb7-AhkuNaqCanviPHMpuJ-5adY8Kxe5jUc,13639
|
||||||
|
PyPDF2-1.25.1.dist-info/DESCRIPTION.rst,sha256=mCiWyCHYtsbQ22O_f2FbbD8CjW1GMfwvbn67J_THZ5M,600
|
||||||
|
PyPDF2-1.25.1.dist-info/METADATA,sha256=lGFpbQOrG5_oOYPi4GlzoQT4Lyj3eCvNEHIomSf4JsU,1174
|
||||||
|
PyPDF2-1.25.1.dist-info/RECORD,,
|
||||||
|
PyPDF2-1.25.1.dist-info/WHEEL,sha256=bfpjj1zBtYtglW1hWtnRCmhEcEV3TH8magB_ZQeGgSg,93
|
||||||
|
PyPDF2-1.25.1.dist-info/metadata.json,sha256=aVLfNzdnpxj8hyl12sDq-3IgfGH7t0g5gS2y6LPYtYE,692
|
||||||
|
PyPDF2-1.25.1.dist-info/top_level.txt,sha256=BERWrwqdvKXaVKhpnMbtO6b11qPA-mBt2r9a0VPF-Ow,7
|
||||||
|
/srv/openmedialibrary/platform/Shared/home/.local/lib/python3.5/site-packages/PyPDF2-1.25.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
PyPDF2/__pycache__/xmp.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/utils.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/pdf.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/merger.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/__init__.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/generic.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/filters.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/pagerange.cpython-35.pyc,,
|
||||||
|
PyPDF2/__pycache__/_version.cpython-35.pyc,,
|
|
@ -0,0 +1,5 @@
|
||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.26.0)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: cp35-none-any
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2", "Programming Language :: Python :: 3", "Operating System :: OS Independent", "Topic :: Software Development :: Libraries :: Python Modules"], "extensions": {"python.details": {"contacts": [{"email": "PyPDF2@phaseit.net", "name": "Phaseit, Inc.", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "http://mstamy2.github.com/PyPDF2"}}}, "generator": "bdist_wheel (0.26.0)", "metadata_version": "2.0", "name": "PyPDF2", "summary": "PDF toolkit", "version": "1.25.1"}
|
|
@ -1,2 +1 @@
|
||||||
__version__ = '1.23'
|
__version__ = '1.25.1'
|
||||||
|
|
||||||
|
|
|
@ -40,28 +40,35 @@ if version_info < ( 3, 0 ):
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
else:
|
else:
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
import struct
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
def decompress(data):
|
def decompress(data):
|
||||||
return zlib.decompress(data)
|
return zlib.decompress(data)
|
||||||
|
|
||||||
def compress(data):
|
def compress(data):
|
||||||
return zlib.compress(data)
|
return zlib.compress(data)
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Unable to import zlib. Attempt to use the System.IO.Compression
|
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||||
# library from the .NET framework. (IronPython only)
|
# library from the .NET framework. (IronPython only)
|
||||||
import System
|
import System
|
||||||
from System import IO, Collections, Array
|
from System import IO, Collections, Array
|
||||||
|
|
||||||
def _string_to_bytearr(buf):
|
def _string_to_bytearr(buf):
|
||||||
retval = Array.CreateInstance(System.Byte, len(buf))
|
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||||
for i in range(len(buf)):
|
for i in range(len(buf)):
|
||||||
retval[i] = ord(buf[i])
|
retval[i] = ord(buf[i])
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def _bytearr_to_string(bytes):
|
def _bytearr_to_string(bytes):
|
||||||
retval = ""
|
retval = ""
|
||||||
for i in range(bytes.Length):
|
for i in range(bytes.Length):
|
||||||
retval += chr(bytes[i])
|
retval += chr(bytes[i])
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def _read_bytes(stream):
|
def _read_bytes(stream):
|
||||||
ms = IO.MemoryStream()
|
ms = IO.MemoryStream()
|
||||||
buf = Array.CreateInstance(System.Byte, 2048)
|
buf = Array.CreateInstance(System.Byte, 2048)
|
||||||
|
@ -74,6 +81,7 @@ except ImportError:
|
||||||
retval = ms.ToArray()
|
retval = ms.ToArray()
|
||||||
ms.Close()
|
ms.Close()
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def decompress(data):
|
def decompress(data):
|
||||||
bytes = _string_to_bytearr(data)
|
bytes = _string_to_bytearr(data)
|
||||||
ms = IO.MemoryStream()
|
ms = IO.MemoryStream()
|
||||||
|
@ -84,6 +92,7 @@ except ImportError:
|
||||||
retval = _bytearr_to_string(bytes)
|
retval = _bytearr_to_string(bytes)
|
||||||
gz.Close()
|
gz.Close()
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def compress(data):
|
def compress(data):
|
||||||
bytes = _string_to_bytearr(data)
|
bytes = _string_to_bytearr(data)
|
||||||
ms = IO.MemoryStream()
|
ms = IO.MemoryStream()
|
||||||
|
@ -106,7 +115,7 @@ class FlateDecode(object):
|
||||||
predictor = decodeParms.get("/Predictor", 1)
|
predictor = decodeParms.get("/Predictor", 1)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass # usually an array with a null object was read
|
pass # usually an array with a null object was read
|
||||||
|
|
||||||
# predictor 1 == no predictor
|
# predictor 1 == no predictor
|
||||||
if predictor != 1:
|
if predictor != 1:
|
||||||
columns = decodeParms["/Columns"]
|
columns = decodeParms["/Columns"]
|
||||||
|
@ -144,6 +153,7 @@ class FlateDecode(object):
|
||||||
return compress(data)
|
return compress(data)
|
||||||
encode = staticmethod(encode)
|
encode = staticmethod(encode)
|
||||||
|
|
||||||
|
|
||||||
class ASCIIHexDecode(object):
|
class ASCIIHexDecode(object):
|
||||||
def decode(data, decodeParms=None):
|
def decode(data, decodeParms=None):
|
||||||
retval = ""
|
retval = ""
|
||||||
|
@ -165,6 +175,7 @@ class ASCIIHexDecode(object):
|
||||||
return retval
|
return retval
|
||||||
decode = staticmethod(decode)
|
decode = staticmethod(decode)
|
||||||
|
|
||||||
|
|
||||||
class LZWDecode(object):
|
class LZWDecode(object):
|
||||||
"""Taken from:
|
"""Taken from:
|
||||||
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
|
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
|
||||||
|
@ -184,7 +195,6 @@ class LZWDecode(object):
|
||||||
def resetDict(self):
|
def resetDict(self):
|
||||||
self.dictlen=258
|
self.dictlen=258
|
||||||
self.bitspercode=9
|
self.bitspercode=9
|
||||||
|
|
||||||
|
|
||||||
def nextCode(self):
|
def nextCode(self):
|
||||||
fillbits=self.bitspercode
|
fillbits=self.bitspercode
|
||||||
|
@ -196,8 +206,8 @@ class LZWDecode(object):
|
||||||
bitsfromhere=8-self.bitpos
|
bitsfromhere=8-self.bitpos
|
||||||
if bitsfromhere>fillbits:
|
if bitsfromhere>fillbits:
|
||||||
bitsfromhere=fillbits
|
bitsfromhere=fillbits
|
||||||
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
|
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
|
||||||
(0xff >> (8-bitsfromhere))) <<
|
(0xff >> (8-bitsfromhere))) <<
|
||||||
(fillbits-bitsfromhere))
|
(fillbits-bitsfromhere))
|
||||||
fillbits -= bitsfromhere
|
fillbits -= bitsfromhere
|
||||||
self.bitpos += bitsfromhere
|
self.bitpos += bitsfromhere
|
||||||
|
@ -235,70 +245,93 @@ class LZWDecode(object):
|
||||||
baos+=p
|
baos+=p
|
||||||
self.dict[self.dictlen] = p;
|
self.dict[self.dictlen] = p;
|
||||||
self.dictlen+=1
|
self.dictlen+=1
|
||||||
if (self.dictlen >= (1 << self.bitspercode) - 1 and
|
if (self.dictlen >= (1 << self.bitspercode) - 1 and
|
||||||
self.bitspercode < 12):
|
self.bitspercode < 12):
|
||||||
self.bitspercode+=1
|
self.bitspercode+=1
|
||||||
return baos
|
return baos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def decode(data,decodeParams=None):
|
def decode(data,decodeParams=None):
|
||||||
return LZWDecode.decoder(data).decode()
|
return LZWDecode.decoder(data).decode()
|
||||||
|
|
||||||
|
|
||||||
class ASCII85Decode(object):
|
class ASCII85Decode(object):
|
||||||
def decode(data, decodeParms=None):
|
def decode(data, decodeParms=None):
|
||||||
retval = ""
|
if version_info < ( 3, 0 ):
|
||||||
group = []
|
retval = ""
|
||||||
x = 0
|
group = []
|
||||||
hitEod = False
|
x = 0
|
||||||
# remove all whitespace from data
|
hitEod = False
|
||||||
data = [y for y in data if not (y in ' \n\r\t')]
|
# remove all whitespace from data
|
||||||
while not hitEod:
|
data = [y for y in data if not (y in ' \n\r\t')]
|
||||||
c = data[x]
|
while not hitEod:
|
||||||
if len(retval) == 0 and c == "<" and data[x+1] == "~":
|
c = data[x]
|
||||||
x += 2
|
if len(retval) == 0 and c == "<" and data[x+1] == "~":
|
||||||
continue
|
x += 2
|
||||||
#elif c.isspace():
|
continue
|
||||||
# x += 1
|
#elif c.isspace():
|
||||||
# continue
|
# x += 1
|
||||||
elif c == 'z':
|
# continue
|
||||||
assert len(group) == 0
|
elif c == 'z':
|
||||||
retval += '\x00\x00\x00\x00'
|
assert len(group) == 0
|
||||||
x += 1
|
retval += '\x00\x00\x00\x00'
|
||||||
continue
|
x += 1
|
||||||
elif c == "~" and data[x+1] == ">":
|
continue
|
||||||
if len(group) != 0:
|
elif c == "~" and data[x+1] == ">":
|
||||||
# cannot have a final group of just 1 char
|
if len(group) != 0:
|
||||||
assert len(group) > 1
|
# cannot have a final group of just 1 char
|
||||||
cnt = len(group) - 1
|
assert len(group) > 1
|
||||||
group += [ 85, 85, 85 ]
|
cnt = len(group) - 1
|
||||||
hitEod = cnt
|
group += [ 85, 85, 85 ]
|
||||||
|
hitEod = cnt
|
||||||
|
else:
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
|
c = ord(c) - 33
|
||||||
|
assert c >= 0 and c < 85
|
||||||
|
group += [ c ]
|
||||||
|
if len(group) >= 5:
|
||||||
|
b = group[0] * (85**4) + \
|
||||||
|
group[1] * (85**3) + \
|
||||||
|
group[2] * (85**2) + \
|
||||||
|
group[3] * 85 + \
|
||||||
|
group[4]
|
||||||
|
assert b < (2**32 - 1)
|
||||||
|
c4 = chr((b >> 0) % 256)
|
||||||
|
c3 = chr((b >> 8) % 256)
|
||||||
|
c2 = chr((b >> 16) % 256)
|
||||||
|
c1 = chr(b >> 24)
|
||||||
|
retval += (c1 + c2 + c3 + c4)
|
||||||
|
if hitEod:
|
||||||
|
retval = retval[:-4+hitEod]
|
||||||
|
group = []
|
||||||
|
x += 1
|
||||||
|
return retval
|
||||||
|
else:
|
||||||
|
if isinstance(data, str):
|
||||||
|
data = data.encode('ascii')
|
||||||
|
n = b = 0
|
||||||
|
out = bytearray()
|
||||||
|
for c in data:
|
||||||
|
if ord('!') <= c and c <= ord('u'):
|
||||||
|
n += 1
|
||||||
|
b = b*85+(c-33)
|
||||||
|
if n == 5:
|
||||||
|
out += struct.pack(b'>L',b)
|
||||||
|
n = b = 0
|
||||||
|
elif c == ord('z'):
|
||||||
|
assert n == 0
|
||||||
|
out += b'\0\0\0\0'
|
||||||
|
elif c == ord('~'):
|
||||||
|
if n:
|
||||||
|
for _ in range(5-n):
|
||||||
|
b = b*85+84
|
||||||
|
out += struct.pack(b'>L',b)[:n-1]
|
||||||
break
|
break
|
||||||
else:
|
return bytes(out)
|
||||||
c = ord(c) - 33
|
|
||||||
assert c >= 0 and c < 85
|
|
||||||
group += [ c ]
|
|
||||||
if len(group) >= 5:
|
|
||||||
b = group[0] * (85**4) + \
|
|
||||||
group[1] * (85**3) + \
|
|
||||||
group[2] * (85**2) + \
|
|
||||||
group[3] * 85 + \
|
|
||||||
group[4]
|
|
||||||
assert b < (2**32 - 1)
|
|
||||||
c4 = chr((b >> 0) % 256)
|
|
||||||
c3 = chr((b >> 8) % 256)
|
|
||||||
c2 = chr((b >> 16) % 256)
|
|
||||||
c1 = chr(b >> 24)
|
|
||||||
retval += (c1 + c2 + c3 + c4)
|
|
||||||
if hitEod:
|
|
||||||
retval = retval[:-4+hitEod]
|
|
||||||
group = []
|
|
||||||
x += 1
|
|
||||||
return retval
|
|
||||||
decode = staticmethod(decode)
|
decode = staticmethod(decode)
|
||||||
|
|
||||||
|
|
||||||
def decodeStreamData(stream):
|
def decodeStreamData(stream):
|
||||||
from .generic import NameObject
|
from .generic import NameObject
|
||||||
filters = stream.get("/Filter", ())
|
filters = stream.get("/Filter", ())
|
||||||
|
@ -306,22 +339,24 @@ def decodeStreamData(stream):
|
||||||
# we have a single filter instance
|
# we have a single filter instance
|
||||||
filters = (filters,)
|
filters = (filters,)
|
||||||
data = stream._data
|
data = stream._data
|
||||||
for filterType in filters:
|
# If there is not data to decode we should not try to decode the data.
|
||||||
if filterType == "/FlateDecode":
|
if data:
|
||||||
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
for filterType in filters:
|
||||||
elif filterType == "/ASCIIHexDecode":
|
if filterType == "/FlateDecode" or filterType == "/Fl":
|
||||||
data = ASCIIHexDecode.decode(data)
|
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
||||||
elif filterType == "/LZWDecode":
|
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
|
||||||
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
|
data = ASCIIHexDecode.decode(data)
|
||||||
elif filterType == "/ASCII85Decode":
|
elif filterType == "/LZWDecode" or filterType == "/LZW":
|
||||||
data = ASCII85Decode.decode(data)
|
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
|
||||||
elif filterType == "/Crypt":
|
elif filterType == "/ASCII85Decode" or filterType == "/A85":
|
||||||
decodeParams = stream.get("/DecodeParams", {})
|
data = ASCII85Decode.decode(data)
|
||||||
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
elif filterType == "/Crypt":
|
||||||
pass
|
decodeParams = stream.get("/DecodeParams", {})
|
||||||
|
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
# unsupported filter
|
||||||
else:
|
raise NotImplementedError("unsupported filter %s" % filterType)
|
||||||
# unsupported filter
|
|
||||||
raise NotImplementedError("unsupported filter %s" % filterType)
|
|
||||||
return data
|
return data
|
||||||
|
|
|
@ -43,11 +43,14 @@ from . import filters
|
||||||
from . import utils
|
from . import utils
|
||||||
import decimal
|
import decimal
|
||||||
import codecs
|
import codecs
|
||||||
|
import sys
|
||||||
#import debugging
|
#import debugging
|
||||||
|
|
||||||
ObjectPrefix = b_('/<[tf(n%')
|
ObjectPrefix = b_('/<[tf(n%')
|
||||||
NumberSigns = b_('+-')
|
NumberSigns = b_('+-')
|
||||||
IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
|
IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
|
||||||
|
|
||||||
|
|
||||||
def readObject(stream, pdf):
|
def readObject(stream, pdf):
|
||||||
tok = stream.read(1)
|
tok = stream.read(1)
|
||||||
stream.seek(-1, 1) # reset to start
|
stream.seek(-1, 1) # reset to start
|
||||||
|
@ -94,6 +97,7 @@ def readObject(stream, pdf):
|
||||||
else:
|
else:
|
||||||
return NumberObject.readFromStream(stream)
|
return NumberObject.readFromStream(stream)
|
||||||
|
|
||||||
|
|
||||||
class PdfObject(object):
|
class PdfObject(object):
|
||||||
def getObject(self):
|
def getObject(self):
|
||||||
"""Resolves indirect references."""
|
"""Resolves indirect references."""
|
||||||
|
@ -225,6 +229,7 @@ class FloatObject(decimal.Decimal, PdfObject):
|
||||||
return decimal.Decimal.__new__(cls, utils.str_(value), context)
|
return decimal.Decimal.__new__(cls, utils.str_(value), context)
|
||||||
except:
|
except:
|
||||||
return decimal.Decimal.__new__(cls, str(value))
|
return decimal.Decimal.__new__(cls, str(value))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if self == self.to_integral():
|
if self == self.to_integral():
|
||||||
return str(self.quantize(decimal.Decimal(1)))
|
return str(self.quantize(decimal.Decimal(1)))
|
||||||
|
@ -244,7 +249,11 @@ class NumberObject(int, PdfObject):
|
||||||
ByteDot = b_(".")
|
ByteDot = b_(".")
|
||||||
|
|
||||||
def __new__(cls, value):
|
def __new__(cls, value):
|
||||||
return int.__new__(cls, value)
|
val = int(value)
|
||||||
|
try:
|
||||||
|
return int.__new__(cls, val)
|
||||||
|
except OverflowError:
|
||||||
|
return int.__new__(cls, 0)
|
||||||
|
|
||||||
def as_numeric(self):
|
def as_numeric(self):
|
||||||
return int(b_(repr(self)))
|
return int(b_(repr(self)))
|
||||||
|
@ -253,16 +262,7 @@ class NumberObject(int, PdfObject):
|
||||||
stream.write(b_(repr(self)))
|
stream.write(b_(repr(self)))
|
||||||
|
|
||||||
def readFromStream(stream):
|
def readFromStream(stream):
|
||||||
num = b_("")
|
num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
|
||||||
while True:
|
|
||||||
tok = stream.read(16)
|
|
||||||
m = NumberObject.NumberPattern.search(tok)
|
|
||||||
if m is not None:
|
|
||||||
stream.seek(m.start() - len(tok), 1)
|
|
||||||
num += tok[:m.start()]
|
|
||||||
break
|
|
||||||
|
|
||||||
num += tok
|
|
||||||
if num.find(NumberObject.ByteDot) != -1:
|
if num.find(NumberObject.ByteDot) != -1:
|
||||||
return FloatObject(num)
|
return FloatObject(num)
|
||||||
else:
|
else:
|
||||||
|
@ -345,13 +345,18 @@ def readStringFromStream(stream):
|
||||||
tok = b_("\b")
|
tok = b_("\b")
|
||||||
elif tok == b_("f"):
|
elif tok == b_("f"):
|
||||||
tok = b_("\f")
|
tok = b_("\f")
|
||||||
|
elif tok == b_("c"):
|
||||||
|
tok = b_("\c")
|
||||||
elif tok == b_("("):
|
elif tok == b_("("):
|
||||||
tok = b_("(")
|
tok = b_("(")
|
||||||
elif tok == b_(")"):
|
elif tok == b_(")"):
|
||||||
tok = b_(")")
|
tok = b_(")")
|
||||||
|
elif tok == b_("/"):
|
||||||
|
tok = b_("/")
|
||||||
elif tok == b_("\\"):
|
elif tok == b_("\\"):
|
||||||
tok = b_("\\")
|
tok = b_("\\")
|
||||||
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")):
|
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["),
|
||||||
|
b_("]"), b_("#"), b_("_"), b_("&"), b_('$')):
|
||||||
# odd/unnessecary escape sequences we have encountered
|
# odd/unnessecary escape sequences we have encountered
|
||||||
tok = b_(tok)
|
tok = b_(tok)
|
||||||
elif tok.isdigit():
|
elif tok.isdigit():
|
||||||
|
@ -378,7 +383,7 @@ def readStringFromStream(stream):
|
||||||
# line break was escaped:
|
# line break was escaped:
|
||||||
tok = b_('')
|
tok = b_('')
|
||||||
else:
|
else:
|
||||||
raise utils.PdfReadError("Unexpected escaped string")
|
raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
|
||||||
txt += tok
|
txt += tok
|
||||||
return createStringObject(txt)
|
return createStringObject(txt)
|
||||||
|
|
||||||
|
@ -456,7 +461,7 @@ class TextStringObject(utils.string_type, PdfObject):
|
||||||
|
|
||||||
|
|
||||||
class NameObject(str, PdfObject):
|
class NameObject(str, PdfObject):
|
||||||
delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]"))
|
delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
|
||||||
surfix = b_("/")
|
surfix = b_("/")
|
||||||
|
|
||||||
def writeToStream(self, stream, encryption_key):
|
def writeToStream(self, stream, encryption_key):
|
||||||
|
@ -468,11 +473,12 @@ class NameObject(str, PdfObject):
|
||||||
name = stream.read(1)
|
name = stream.read(1)
|
||||||
if name != NameObject.surfix:
|
if name != NameObject.surfix:
|
||||||
raise utils.PdfReadError("name read error")
|
raise utils.PdfReadError("name read error")
|
||||||
name += utils.readUntilRegex(stream, NameObject.delimiterPattern)
|
name += utils.readUntilRegex(stream, NameObject.delimiterPattern,
|
||||||
|
ignore_eof=True)
|
||||||
if debug: print(name)
|
if debug: print(name)
|
||||||
try:
|
try:
|
||||||
return NameObject(name.decode('utf-8'))
|
return NameObject(name.decode('utf-8'))
|
||||||
except UnicodeDecodeError as e:
|
except (UnicodeEncodeError, UnicodeDecodeError) as e:
|
||||||
# Name objects should represent irregular characters
|
# Name objects should represent irregular characters
|
||||||
# with a '#' followed by the symbol's hex number
|
# with a '#' followed by the symbol's hex number
|
||||||
if not pdf.strict:
|
if not pdf.strict:
|
||||||
|
@ -630,6 +636,7 @@ class DictionaryObject(dict, PdfObject):
|
||||||
return retval
|
return retval
|
||||||
readFromStream = staticmethod(readFromStream)
|
readFromStream = staticmethod(readFromStream)
|
||||||
|
|
||||||
|
|
||||||
class TreeObject(DictionaryObject):
|
class TreeObject(DictionaryObject):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
DictionaryObject.__init__(self)
|
DictionaryObject.__init__(self)
|
||||||
|
@ -726,7 +733,6 @@ class TreeObject(DictionaryObject):
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
prevRef = curRef
|
prevRef = curRef
|
||||||
prev = cur
|
prev = cur
|
||||||
if NameObject('/Next') in cur:
|
if NameObject('/Next') in cur:
|
||||||
|
@ -938,6 +944,7 @@ class RectangleObject(ArrayObject):
|
||||||
in (x,y) form.
|
in (x,y) form.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Field(TreeObject):
|
class Field(TreeObject):
|
||||||
"""
|
"""
|
||||||
A class representing a field dictionary. This class is accessed through
|
A class representing a field dictionary. This class is accessed through
|
||||||
|
@ -1009,6 +1016,7 @@ class Field(TreeObject):
|
||||||
See Section 8.5.2 of the PDF 1.7 reference.
|
See Section 8.5.2 of the PDF 1.7 reference.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Destination(TreeObject):
|
class Destination(TreeObject):
|
||||||
"""
|
"""
|
||||||
A class representing a destination within a PDF file.
|
A class representing a destination within a PDF file.
|
||||||
|
@ -1157,6 +1165,7 @@ def encode_pdfdocencoding(unicode_string):
|
||||||
"does not exist in translation table")
|
"does not exist in translation table")
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
def decode_pdfdocencoding(byte_array):
|
def decode_pdfdocencoding(byte_array):
|
||||||
retval = u_('')
|
retval = u_('')
|
||||||
for b in byte_array:
|
for b in byte_array:
|
||||||
|
@ -1211,4 +1220,3 @@ for i in range(256):
|
||||||
continue
|
continue
|
||||||
assert char not in _pdfDocEncoding_rev
|
assert char not in _pdfDocEncoding_rev
|
||||||
_pdfDocEncoding_rev[char] = i
|
_pdfDocEncoding_rev[char] = i
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
from .generic import *
|
from .generic import *
|
||||||
from .utils import string_type
|
from .utils import isString, str_
|
||||||
from .pdf import PdfFileReader, PdfFileWriter
|
from .pdf import PdfFileReader, PdfFileWriter
|
||||||
from .pagerange import PageRange
|
from .pagerange import PageRange
|
||||||
from sys import version_info
|
from sys import version_info
|
||||||
|
@ -40,6 +40,7 @@ else:
|
||||||
from io import FileIO as file
|
from io import FileIO as file
|
||||||
StreamIO = BytesIO
|
StreamIO = BytesIO
|
||||||
|
|
||||||
|
|
||||||
class _MergedPage(object):
|
class _MergedPage(object):
|
||||||
"""
|
"""
|
||||||
_MergedPage is used internally by PdfFileMerger to collect necessary
|
_MergedPage is used internally by PdfFileMerger to collect necessary
|
||||||
|
@ -50,13 +51,14 @@ class _MergedPage(object):
|
||||||
self.pagedata = pagedata
|
self.pagedata = pagedata
|
||||||
self.out_pagedata = None
|
self.out_pagedata = None
|
||||||
self.id = id
|
self.id = id
|
||||||
|
|
||||||
|
|
||||||
class PdfFileMerger(object):
|
class PdfFileMerger(object):
|
||||||
"""
|
"""
|
||||||
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
|
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
|
||||||
into a single PDF. It can concatenate, slice, insert, or any combination
|
into a single PDF. It can concatenate, slice, insert, or any combination
|
||||||
of the above.
|
of the above.
|
||||||
|
|
||||||
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
|
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
|
||||||
and :meth:`write()<write>` for usage information.
|
and :meth:`write()<write>` for usage information.
|
||||||
|
|
||||||
|
@ -64,7 +66,7 @@ class PdfFileMerger(object):
|
||||||
problems and also causes some correctable problems to be fatal.
|
problems and also causes some correctable problems to be fatal.
|
||||||
Defaults to ``True``.
|
Defaults to ``True``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, strict=True):
|
def __init__(self, strict=True):
|
||||||
self.inputs = []
|
self.inputs = []
|
||||||
self.pages = []
|
self.pages = []
|
||||||
|
@ -73,7 +75,7 @@ class PdfFileMerger(object):
|
||||||
self.named_dests = []
|
self.named_dests = []
|
||||||
self.id_count = 0
|
self.id_count = 0
|
||||||
self.strict = strict
|
self.strict = strict
|
||||||
|
|
||||||
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||||
"""
|
"""
|
||||||
Merges the pages from the given file into the output file at the
|
Merges the pages from the given file into the output file at the
|
||||||
|
@ -85,29 +87,30 @@ class PdfFileMerger(object):
|
||||||
:param fileobj: A File Object or an object that supports the standard read
|
:param fileobj: A File Object or an object that supports the standard read
|
||||||
and seek methods similar to a File Object. Could also be a
|
and seek methods similar to a File Object. Could also be a
|
||||||
string representing a path to a PDF file.
|
string representing a path to a PDF file.
|
||||||
|
|
||||||
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||||
the beginning of the included file by supplying the text of the bookmark.
|
the beginning of the included file by supplying the text of the bookmark.
|
||||||
|
|
||||||
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
||||||
to merge only the specified range of pages from the source
|
to merge only the specified range of pages from the source
|
||||||
document into the output document.
|
document into the output document.
|
||||||
|
|
||||||
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||||
from being imported by specifying this as ``False``.
|
from being imported by specifying this as ``False``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# This parameter is passed to self.inputs.append and means
|
# This parameter is passed to self.inputs.append and means
|
||||||
# that the stream used was created in this method.
|
# that the stream used was created in this method.
|
||||||
my_file = False
|
my_file = False
|
||||||
|
|
||||||
# If the fileobj parameter is a string, assume it is a path
|
# If the fileobj parameter is a string, assume it is a path
|
||||||
# and create a file object at that location. If it is a file,
|
# and create a file object at that location. If it is a file,
|
||||||
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
|
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
|
||||||
# it is a PdfFileReader, copy that reader's stream into a
|
# it is a PdfFileReader, copy that reader's stream into a
|
||||||
# BytesIO (or StreamIO) stream.
|
# BytesIO (or StreamIO) stream.
|
||||||
# If fileobj is none of the above types, it is not modified
|
# If fileobj is none of the above types, it is not modified
|
||||||
if type(fileobj) == string_type:
|
decryption_key = None
|
||||||
|
if isString(fileobj):
|
||||||
fileobj = file(fileobj, 'rb')
|
fileobj = file(fileobj, 'rb')
|
||||||
my_file = True
|
my_file = True
|
||||||
elif isinstance(fileobj, file):
|
elif isinstance(fileobj, file):
|
||||||
|
@ -116,17 +119,21 @@ class PdfFileMerger(object):
|
||||||
fileobj = StreamIO(filecontent)
|
fileobj = StreamIO(filecontent)
|
||||||
my_file = True
|
my_file = True
|
||||||
elif isinstance(fileobj, PdfFileReader):
|
elif isinstance(fileobj, PdfFileReader):
|
||||||
orig_tell = fileobj.stream.tell()
|
orig_tell = fileobj.stream.tell()
|
||||||
fileobj.stream.seek(0)
|
fileobj.stream.seek(0)
|
||||||
filecontent = StreamIO(fileobj.stream.read())
|
filecontent = StreamIO(fileobj.stream.read())
|
||||||
fileobj.stream.seek(orig_tell) # reset the stream to its original location
|
fileobj.stream.seek(orig_tell) # reset the stream to its original location
|
||||||
fileobj = filecontent
|
fileobj = filecontent
|
||||||
|
if hasattr(fileobj, '_decryption_key'):
|
||||||
|
decryption_key = fileobj._decryption_key
|
||||||
my_file = True
|
my_file = True
|
||||||
|
|
||||||
# Create a new PdfFileReader instance using the stream
|
# Create a new PdfFileReader instance using the stream
|
||||||
# (either file or BytesIO or StringIO) created above
|
# (either file or BytesIO or StringIO) created above
|
||||||
pdfr = PdfFileReader(fileobj, strict=self.strict)
|
pdfr = PdfFileReader(fileobj, strict=self.strict)
|
||||||
|
if decryption_key is not None:
|
||||||
|
pdfr._decryption_key = decryption_key
|
||||||
|
|
||||||
# Find the range of pages to merge.
|
# Find the range of pages to merge.
|
||||||
if pages == None:
|
if pages == None:
|
||||||
pages = (0, pdfr.getNumPages())
|
pages = (0, pdfr.getNumPages())
|
||||||
|
@ -134,47 +141,45 @@ class PdfFileMerger(object):
|
||||||
pages = pages.indices(pdfr.getNumPages())
|
pages = pages.indices(pdfr.getNumPages())
|
||||||
elif not isinstance(pages, tuple):
|
elif not isinstance(pages, tuple):
|
||||||
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
|
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
|
||||||
|
|
||||||
srcpages = []
|
srcpages = []
|
||||||
if bookmark:
|
if bookmark:
|
||||||
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
|
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
|
||||||
|
|
||||||
outline = []
|
outline = []
|
||||||
if import_bookmarks:
|
if import_bookmarks:
|
||||||
outline = pdfr.getOutlines()
|
outline = pdfr.getOutlines()
|
||||||
outline = self._trim_outline(pdfr, outline, pages)
|
outline = self._trim_outline(pdfr, outline, pages)
|
||||||
|
|
||||||
if bookmark:
|
if bookmark:
|
||||||
self.bookmarks += [bookmark, outline]
|
self.bookmarks += [bookmark, outline]
|
||||||
else:
|
else:
|
||||||
self.bookmarks += outline
|
self.bookmarks += outline
|
||||||
|
|
||||||
dests = pdfr.namedDestinations
|
dests = pdfr.namedDestinations
|
||||||
dests = self._trim_dests(pdfr, dests, pages)
|
dests = self._trim_dests(pdfr, dests, pages)
|
||||||
self.named_dests += dests
|
self.named_dests += dests
|
||||||
|
|
||||||
# Gather all the pages that are going to be merged
|
# Gather all the pages that are going to be merged
|
||||||
for i in range(*pages):
|
for i in range(*pages):
|
||||||
pg = pdfr.getPage(i)
|
pg = pdfr.getPage(i)
|
||||||
|
|
||||||
id = self.id_count
|
id = self.id_count
|
||||||
self.id_count += 1
|
self.id_count += 1
|
||||||
|
|
||||||
mp = _MergedPage(pg, pdfr, id)
|
mp = _MergedPage(pg, pdfr, id)
|
||||||
|
|
||||||
srcpages.append(mp)
|
srcpages.append(mp)
|
||||||
|
|
||||||
self._associate_dests_to_pages(srcpages)
|
self._associate_dests_to_pages(srcpages)
|
||||||
self._associate_bookmarks_to_pages(srcpages)
|
self._associate_bookmarks_to_pages(srcpages)
|
||||||
|
|
||||||
|
|
||||||
# Slice to insert the pages at the specified position
|
# Slice to insert the pages at the specified position
|
||||||
self.pages[position:position] = srcpages
|
self.pages[position:position] = srcpages
|
||||||
|
|
||||||
# Keep track of our input files so we can close them later
|
# Keep track of our input files so we can close them later
|
||||||
self.inputs.append((fileobj, pdfr, my_file))
|
self.inputs.append((fileobj, pdfr, my_file))
|
||||||
|
|
||||||
|
|
||||||
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||||
"""
|
"""
|
||||||
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
|
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
|
||||||
|
@ -183,7 +188,7 @@ class PdfFileMerger(object):
|
||||||
:param fileobj: A File Object or an object that supports the standard read
|
:param fileobj: A File Object or an object that supports the standard read
|
||||||
and seek methods similar to a File Object. Could also be a
|
and seek methods similar to a File Object. Could also be a
|
||||||
string representing a path to a PDF file.
|
string representing a path to a PDF file.
|
||||||
|
|
||||||
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||||
the beginning of the included file by supplying the text of the bookmark.
|
the beginning of the included file by supplying the text of the bookmark.
|
||||||
|
|
||||||
|
@ -194,10 +199,9 @@ class PdfFileMerger(object):
|
||||||
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||||
from being imported by specifying this as ``False``.
|
from being imported by specifying this as ``False``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
|
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
|
||||||
|
|
||||||
|
|
||||||
def write(self, fileobj):
|
def write(self, fileobj):
|
||||||
"""
|
"""
|
||||||
Writes all data that has been merged to the given output file.
|
Writes all data that has been merged to the given output file.
|
||||||
|
@ -206,11 +210,10 @@ class PdfFileMerger(object):
|
||||||
file-like object.
|
file-like object.
|
||||||
"""
|
"""
|
||||||
my_file = False
|
my_file = False
|
||||||
if type(fileobj) in (str, str):
|
if isString(fileobj):
|
||||||
fileobj = file(fileobj, 'wb')
|
fileobj = file(fileobj, 'wb')
|
||||||
my_file = True
|
my_file = True
|
||||||
|
|
||||||
|
|
||||||
# Add pages to the PdfFileWriter
|
# Add pages to the PdfFileWriter
|
||||||
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
|
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
|
||||||
for page in self.pages:
|
for page in self.pages:
|
||||||
|
@ -222,15 +225,13 @@ class PdfFileMerger(object):
|
||||||
# Once all pages are added, create bookmarks to point at those pages
|
# Once all pages are added, create bookmarks to point at those pages
|
||||||
self._write_dests()
|
self._write_dests()
|
||||||
self._write_bookmarks()
|
self._write_bookmarks()
|
||||||
|
|
||||||
# Write the output to the file
|
# Write the output to the file
|
||||||
self.output.write(fileobj)
|
self.output.write(fileobj)
|
||||||
|
|
||||||
if my_file:
|
if my_file:
|
||||||
fileobj.close()
|
fileobj.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""
|
"""
|
||||||
Shuts all file descriptors (input and output) and clears all memory
|
Shuts all file descriptors (input and output) and clears all memory
|
||||||
|
@ -240,7 +241,7 @@ class PdfFileMerger(object):
|
||||||
for fo, pdfr, mine in self.inputs:
|
for fo, pdfr, mine in self.inputs:
|
||||||
if mine:
|
if mine:
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
self.inputs = []
|
self.inputs = []
|
||||||
self.output = None
|
self.output = None
|
||||||
|
|
||||||
|
@ -253,7 +254,7 @@ class PdfFileMerger(object):
|
||||||
Example: ``{u'/Title': u'My title'}``
|
Example: ``{u'/Title': u'My title'}``
|
||||||
"""
|
"""
|
||||||
self.output.addMetadata(infos)
|
self.output.addMetadata(infos)
|
||||||
|
|
||||||
def setPageLayout(self, layout):
|
def setPageLayout(self, layout):
|
||||||
"""
|
"""
|
||||||
Set the page layout
|
Set the page layout
|
||||||
|
@ -289,7 +290,7 @@ class PdfFileMerger(object):
|
||||||
|
|
||||||
def _trim_dests(self, pdf, dests, pages):
|
def _trim_dests(self, pdf, dests, pages):
|
||||||
"""
|
"""
|
||||||
Removes any named destinations that are not a part of the specified
|
Removes any named destinations that are not a part of the specified
|
||||||
page set.
|
page set.
|
||||||
"""
|
"""
|
||||||
new_dests = []
|
new_dests = []
|
||||||
|
@ -298,14 +299,14 @@ class PdfFileMerger(object):
|
||||||
for j in range(*pages):
|
for j in range(*pages):
|
||||||
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
||||||
o[NameObject('/Page')] = o['/Page'].getObject()
|
o[NameObject('/Page')] = o['/Page'].getObject()
|
||||||
assert str(k) == str(o['/Title'])
|
assert str_(k) == str_(o['/Title'])
|
||||||
new_dests.append(o)
|
new_dests.append(o)
|
||||||
break
|
break
|
||||||
return new_dests
|
return new_dests
|
||||||
|
|
||||||
def _trim_outline(self, pdf, outline, pages):
|
def _trim_outline(self, pdf, outline, pages):
|
||||||
"""
|
"""
|
||||||
Removes any outline/bookmark entries that are not a part of the
|
Removes any outline/bookmark entries that are not a part of the
|
||||||
specified page set.
|
specified page set.
|
||||||
"""
|
"""
|
||||||
new_outline = []
|
new_outline = []
|
||||||
|
@ -326,10 +327,10 @@ class PdfFileMerger(object):
|
||||||
prev_header_added = True
|
prev_header_added = True
|
||||||
break
|
break
|
||||||
return new_outline
|
return new_outline
|
||||||
|
|
||||||
def _write_dests(self):
|
def _write_dests(self):
|
||||||
dests = self.named_dests
|
dests = self.named_dests
|
||||||
|
|
||||||
for v in dests:
|
for v in dests:
|
||||||
pageno = None
|
pageno = None
|
||||||
pdf = None
|
pdf = None
|
||||||
|
@ -342,19 +343,18 @@ class PdfFileMerger(object):
|
||||||
break
|
break
|
||||||
if pageno != None:
|
if pageno != None:
|
||||||
self.output.addNamedDestinationObject(v)
|
self.output.addNamedDestinationObject(v)
|
||||||
|
|
||||||
def _write_bookmarks(self, bookmarks=None, parent=None):
|
def _write_bookmarks(self, bookmarks=None, parent=None):
|
||||||
|
|
||||||
if bookmarks == None:
|
if bookmarks == None:
|
||||||
bookmarks = self.bookmarks
|
bookmarks = self.bookmarks
|
||||||
|
|
||||||
|
|
||||||
last_added = None
|
last_added = None
|
||||||
for b in bookmarks:
|
for b in bookmarks:
|
||||||
if isinstance(b, list):
|
if isinstance(b, list):
|
||||||
self._write_bookmarks(b, last_added)
|
self._write_bookmarks(b, last_added)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pageno = None
|
pageno = None
|
||||||
pdf = None
|
pdf = None
|
||||||
if '/Page' in b:
|
if '/Page' in b:
|
||||||
|
@ -410,31 +410,31 @@ class PdfFileMerger(object):
|
||||||
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
|
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
|
||||||
|
|
||||||
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
|
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
|
||||||
|
|
||||||
pageno = i
|
pageno = i
|
||||||
pdf = p.src
|
pdf = p.src
|
||||||
break
|
break
|
||||||
if pageno != None:
|
if pageno != None:
|
||||||
del b['/Page'], b['/Type']
|
del b['/Page'], b['/Type']
|
||||||
last_added = self.output.addBookmarkDict(b, parent)
|
last_added = self.output.addBookmarkDict(b, parent)
|
||||||
|
|
||||||
def _associate_dests_to_pages(self, pages):
|
def _associate_dests_to_pages(self, pages):
|
||||||
for nd in self.named_dests:
|
for nd in self.named_dests:
|
||||||
pageno = None
|
pageno = None
|
||||||
np = nd['/Page']
|
np = nd['/Page']
|
||||||
|
|
||||||
if isinstance(np, NumberObject):
|
if isinstance(np, NumberObject):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for p in pages:
|
for p in pages:
|
||||||
if np.getObject() == p.pagedata.getObject():
|
if np.getObject() == p.pagedata.getObject():
|
||||||
pageno = p.id
|
pageno = p.id
|
||||||
|
|
||||||
if pageno != None:
|
if pageno != None:
|
||||||
nd[NameObject('/Page')] = NumberObject(pageno)
|
nd[NameObject('/Page')] = NumberObject(pageno)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
|
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
|
||||||
|
|
||||||
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
|
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
|
||||||
if bookmarks == None:
|
if bookmarks == None:
|
||||||
bookmarks = self.bookmarks
|
bookmarks = self.bookmarks
|
||||||
|
@ -443,35 +443,35 @@ class PdfFileMerger(object):
|
||||||
if isinstance(b, list):
|
if isinstance(b, list):
|
||||||
self._associate_bookmarks_to_pages(pages, b)
|
self._associate_bookmarks_to_pages(pages, b)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pageno = None
|
pageno = None
|
||||||
bp = b['/Page']
|
bp = b['/Page']
|
||||||
|
|
||||||
if isinstance(bp, NumberObject):
|
if isinstance(bp, NumberObject):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for p in pages:
|
for p in pages:
|
||||||
if bp.getObject() == p.pagedata.getObject():
|
if bp.getObject() == p.pagedata.getObject():
|
||||||
pageno = p.id
|
pageno = p.id
|
||||||
|
|
||||||
if pageno != None:
|
if pageno != None:
|
||||||
b[NameObject('/Page')] = NumberObject(pageno)
|
b[NameObject('/Page')] = NumberObject(pageno)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
|
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
|
||||||
|
|
||||||
def findBookmark(self, bookmark, root=None):
|
def findBookmark(self, bookmark, root=None):
|
||||||
if root == None:
|
if root == None:
|
||||||
root = self.bookmarks
|
root = self.bookmarks
|
||||||
|
|
||||||
for i, b in enumerate(root):
|
for i, b in enumerate(root):
|
||||||
if isinstance(b, list):
|
if isinstance(b, list):
|
||||||
res = self.findBookmark(bookmark, b)
|
res = self.findBookmark(bookmark, b)
|
||||||
if res:
|
if res:
|
||||||
return [i] + res
|
return [i] + res
|
||||||
elif b == bookmark or b['/Title'] == bookmark:
|
elif b == bookmark or b['/Title'] == bookmark:
|
||||||
return [i]
|
return [i]
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def addBookmark(self, title, pagenum, parent=None):
|
def addBookmark(self, title, pagenum, parent=None):
|
||||||
"""
|
"""
|
||||||
|
@ -483,28 +483,27 @@ class PdfFileMerger(object):
|
||||||
bookmarks.
|
bookmarks.
|
||||||
"""
|
"""
|
||||||
if parent == None:
|
if parent == None:
|
||||||
iloc = [len(self.bookmarks)-1]
|
iloc = [len(self.bookmarks)-1]
|
||||||
elif isinstance(parent, list):
|
elif isinstance(parent, list):
|
||||||
iloc = parent
|
iloc = parent
|
||||||
else:
|
else:
|
||||||
iloc = self.findBookmark(parent)
|
iloc = self.findBookmark(parent)
|
||||||
|
|
||||||
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||||
|
|
||||||
if parent == None:
|
if parent == None:
|
||||||
self.bookmarks.append(dest)
|
self.bookmarks.append(dest)
|
||||||
else:
|
else:
|
||||||
bmparent = self.bookmarks
|
bmparent = self.bookmarks
|
||||||
for i in iloc[:-1]:
|
for i in iloc[:-1]:
|
||||||
bmparent = bmparent[i]
|
bmparent = bmparent[i]
|
||||||
npos = iloc[-1]+1
|
npos = iloc[-1]+1
|
||||||
if npos < len(bmparent) and isinstance(bmparent[npos], list):
|
if npos < len(bmparent) and isinstance(bmparent[npos], list):
|
||||||
bmparent[npos].append(dest)
|
bmparent[npos].append(dest)
|
||||||
else:
|
else:
|
||||||
bmparent.insert(npos, [dest])
|
bmparent.insert(npos, [dest])
|
||||||
return dest
|
return dest
|
||||||
|
|
||||||
|
|
||||||
def addNamedDestination(self, title, pagenum):
|
def addNamedDestination(self, title, pagenum):
|
||||||
"""
|
"""
|
||||||
Add a destination to the output.
|
Add a destination to the output.
|
||||||
|
@ -512,7 +511,7 @@ class PdfFileMerger(object):
|
||||||
:param str title: Title to use
|
:param str title: Title to use
|
||||||
:param int pagenum: Page number this destination points at.
|
:param int pagenum: Page number this destination points at.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||||
self.named_dests.append(dest)
|
self.named_dests.append(dest)
|
||||||
|
|
||||||
|
@ -523,12 +522,12 @@ class OutlinesObject(list):
|
||||||
self.tree = tree
|
self.tree = tree
|
||||||
self.pdf = pdf
|
self.pdf = pdf
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
def remove(self, index):
|
def remove(self, index):
|
||||||
obj = self[index]
|
obj = self[index]
|
||||||
del self[index]
|
del self[index]
|
||||||
self.tree.removeChild(obj)
|
self.tree.removeChild(obj)
|
||||||
|
|
||||||
def add(self, title, pagenum):
|
def add(self, title, pagenum):
|
||||||
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
|
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
|
||||||
action = DictionaryObject()
|
action = DictionaryObject()
|
||||||
|
@ -547,7 +546,7 @@ class OutlinesObject(list):
|
||||||
self.pdf._addObject(bookmark)
|
self.pdf._addObject(bookmark)
|
||||||
|
|
||||||
self.tree.addChild(bookmark)
|
self.tree.addChild(bookmark)
|
||||||
|
|
||||||
def removeAll(self):
|
def removeAll(self):
|
||||||
for child in [x for x in self.tree.children()]:
|
for child in [x for x in self.tree.children()]:
|
||||||
self.tree.removeChild(child)
|
self.tree.removeChild(child)
|
||||||
|
|
|
@ -8,7 +8,7 @@ see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from .utils import Str
|
from .utils import isString
|
||||||
|
|
||||||
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
|
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
|
||||||
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
|
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
|
||||||
|
@ -32,11 +32,11 @@ PAGE_RANGE_HELP = """Remember, page indices start with zero.
|
||||||
::-1 all pages in reverse order.
|
::-1 all pages in reverse order.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class PageRange(object):
|
class PageRange(object):
|
||||||
"""
|
"""
|
||||||
A slice-like representation of a range of page indices,
|
A slice-like representation of a range of page indices,
|
||||||
i.e. page numbers, only starting at zero.
|
i.e. page numbers, only starting at zero.
|
||||||
The syntax is like what you would put between brackets [ ].
|
The syntax is like what you would put between brackets [ ].
|
||||||
The slice is one of the few Python types that can't be subclassed,
|
The slice is one of the few Python types that can't be subclassed,
|
||||||
but this class converts to and from slices, and allows similar use.
|
but this class converts to and from slices, and allows similar use.
|
||||||
|
@ -46,7 +46,7 @@ class PageRange(object):
|
||||||
o str() and repr() allow printing.
|
o str() and repr() allow printing.
|
||||||
o indices(n) is like slice.indices(n).
|
o indices(n) is like slice.indices(n).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, arg):
|
def __init__(self, arg):
|
||||||
"""
|
"""
|
||||||
Initialize with either a slice -- giving the equivalent page range,
|
Initialize with either a slice -- giving the equivalent page range,
|
||||||
|
@ -67,8 +67,8 @@ class PageRange(object):
|
||||||
if isinstance(arg, PageRange):
|
if isinstance(arg, PageRange):
|
||||||
self._slice = arg.to_slice()
|
self._slice = arg.to_slice()
|
||||||
return
|
return
|
||||||
|
|
||||||
m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg)
|
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
|
||||||
if not m:
|
if not m:
|
||||||
raise ParseError(arg)
|
raise ParseError(arg)
|
||||||
elif m.group(2):
|
elif m.group(2):
|
||||||
|
@ -77,25 +77,25 @@ class PageRange(object):
|
||||||
stop = start + 1 if start != -1 else None
|
stop = start + 1 if start != -1 else None
|
||||||
self._slice = slice(start, stop)
|
self._slice = slice(start, stop)
|
||||||
else:
|
else:
|
||||||
self._slice = slice(*[int(g) if g else None
|
self._slice = slice(*[int(g) if g else None
|
||||||
for g in m.group(4, 6, 8)])
|
for g in m.group(4, 6, 8)])
|
||||||
|
|
||||||
# Just formatting this when there is __doc__ for __init__
|
# Just formatting this when there is __doc__ for __init__
|
||||||
if __init__.__doc__:
|
if __init__.__doc__:
|
||||||
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
|
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def valid(input):
|
def valid(input):
|
||||||
""" True if input is a valid initializer for a PageRange. """
|
""" True if input is a valid initializer for a PageRange. """
|
||||||
return isinstance(input, slice) or \
|
return isinstance(input, slice) or \
|
||||||
isinstance(input, PageRange) or \
|
isinstance(input, PageRange) or \
|
||||||
(isinstance(input, Str)
|
(isString(input)
|
||||||
and bool(re.match(PAGE_RANGE_RE, input)))
|
and bool(re.match(PAGE_RANGE_RE, input)))
|
||||||
|
|
||||||
def to_slice(self):
|
def to_slice(self):
|
||||||
""" Return the slice equivalent of this page range. """
|
""" Return the slice equivalent of this page range. """
|
||||||
return self._slice
|
return self._slice
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
""" A string like "1:2:3". """
|
""" A string like "1:2:3". """
|
||||||
s = self._slice
|
s = self._slice
|
||||||
|
@ -127,7 +127,7 @@ def parse_filename_page_ranges(args):
|
||||||
"""
|
"""
|
||||||
Given a list of filenames and page ranges, return a list of
|
Given a list of filenames and page ranges, return a list of
|
||||||
(filename, page_range) pairs.
|
(filename, page_range) pairs.
|
||||||
First arg must be a filename; other ags are filenames, page-range
|
First arg must be a filename; other ags are filenames, page-range
|
||||||
expressions, slice objects, or PageRange objects.
|
expressions, slice objects, or PageRange objects.
|
||||||
A filename not followed by a page range indicates all pages of the file.
|
A filename not followed by a page range indicates all pages of the file.
|
||||||
"""
|
"""
|
||||||
|
@ -146,7 +146,7 @@ def parse_filename_page_ranges(args):
|
||||||
# New filename or end of list--do all of the previous file?
|
# New filename or end of list--do all of the previous file?
|
||||||
if pdf_filename and not did_page_range:
|
if pdf_filename and not did_page_range:
|
||||||
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
|
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
|
||||||
|
|
||||||
pdf_filename = arg
|
pdf_filename = arg
|
||||||
did_page_range = False
|
did_page_range = False
|
||||||
return pairs
|
return pairs
|
||||||
|
|
|
@ -63,7 +63,7 @@ import warnings
|
||||||
import codecs
|
import codecs
|
||||||
from .generic import *
|
from .generic import *
|
||||||
from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
|
from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
|
||||||
from .utils import Str, b_, u_, ord_, chr_, str_, string_type, formatWarning
|
from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning
|
||||||
|
|
||||||
if version_info < ( 2, 4 ):
|
if version_info < ( 2, 4 ):
|
||||||
from sets import ImmutableSet as frozenset
|
from sets import ImmutableSet as frozenset
|
||||||
|
@ -74,6 +74,7 @@ else:
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
class PdfFileWriter(object):
|
class PdfFileWriter(object):
|
||||||
"""
|
"""
|
||||||
This class supports writing PDF files out, given pages produced by another
|
This class supports writing PDF files out, given pages produced by another
|
||||||
|
@ -228,6 +229,157 @@ class PdfFileWriter(object):
|
||||||
NameObject("/OpenAction"): self._addObject(js)
|
NameObject("/OpenAction"): self._addObject(js)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def addAttachment(self, fname, fdata):
|
||||||
|
"""
|
||||||
|
Embed a file inside the PDF.
|
||||||
|
|
||||||
|
:param str fname: The filename to display.
|
||||||
|
:param str fdata: The data in the file.
|
||||||
|
|
||||||
|
Reference:
|
||||||
|
https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
|
||||||
|
Section 7.11.3
|
||||||
|
"""
|
||||||
|
|
||||||
|
# We need 3 entries:
|
||||||
|
# * The file's data
|
||||||
|
# * The /Filespec entry
|
||||||
|
# * The file's name, which goes in the Catalog
|
||||||
|
|
||||||
|
|
||||||
|
# The entry for the file
|
||||||
|
""" Sample:
|
||||||
|
8 0 obj
|
||||||
|
<<
|
||||||
|
/Length 12
|
||||||
|
/Type /EmbeddedFile
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
Hello world!
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
"""
|
||||||
|
file_entry = DecodedStreamObject()
|
||||||
|
file_entry.setData(fdata)
|
||||||
|
file_entry.update({
|
||||||
|
NameObject("/Type"): NameObject("/EmbeddedFile")
|
||||||
|
})
|
||||||
|
|
||||||
|
# The Filespec entry
|
||||||
|
""" Sample:
|
||||||
|
7 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Filespec
|
||||||
|
/F (hello.txt)
|
||||||
|
/EF << /F 8 0 R >>
|
||||||
|
>>
|
||||||
|
"""
|
||||||
|
efEntry = DictionaryObject()
|
||||||
|
efEntry.update({ NameObject("/F"):file_entry })
|
||||||
|
|
||||||
|
filespec = DictionaryObject()
|
||||||
|
filespec.update({
|
||||||
|
NameObject("/Type"): NameObject("/Filespec"),
|
||||||
|
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
|
||||||
|
NameObject("/EF"): efEntry
|
||||||
|
})
|
||||||
|
|
||||||
|
# Then create the entry for the root, as it needs a reference to the Filespec
|
||||||
|
""" Sample:
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Catalog
|
||||||
|
/Outlines 2 0 R
|
||||||
|
/Pages 3 0 R
|
||||||
|
/Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
"""
|
||||||
|
embeddedFilesNamesDictionary = DictionaryObject()
|
||||||
|
embeddedFilesNamesDictionary.update({
|
||||||
|
NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
|
||||||
|
})
|
||||||
|
|
||||||
|
embeddedFilesDictionary = DictionaryObject()
|
||||||
|
embeddedFilesDictionary.update({
|
||||||
|
NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
|
||||||
|
})
|
||||||
|
# Update the root
|
||||||
|
self._root_object.update({
|
||||||
|
NameObject("/Names"): embeddedFilesDictionary
|
||||||
|
})
|
||||||
|
|
||||||
|
def appendPagesFromReader(self, reader, after_page_append=None):
|
||||||
|
"""
|
||||||
|
Copy pages from reader to writer. Includes an optional callback parameter
|
||||||
|
which is invoked after pages are appended to the writer.
|
||||||
|
|
||||||
|
:param reader: a PdfFileReader object from which to copy page
|
||||||
|
annotations to this writer object. The writer's annots
|
||||||
|
will then be updated
|
||||||
|
:callback after_page_append (function): Callback function that is invoked after
|
||||||
|
each page is appended to the writer. Callback signature:
|
||||||
|
|
||||||
|
:param writer_pageref (PDF page reference): Reference to the page
|
||||||
|
appended to the writer.
|
||||||
|
"""
|
||||||
|
# Get page count from writer and reader
|
||||||
|
reader_num_pages = reader.getNumPages()
|
||||||
|
writer_num_pages = self.getNumPages()
|
||||||
|
|
||||||
|
# Copy pages from reader to writer
|
||||||
|
for rpagenum in range(0, reader_num_pages):
|
||||||
|
reader_page = reader.getPage(rpagenum)
|
||||||
|
self.addPage(reader_page)
|
||||||
|
writer_page = self.getPage(writer_num_pages+rpagenum)
|
||||||
|
# Trigger callback, pass writer page as parameter
|
||||||
|
if callable(after_page_append): after_page_append(writer_page)
|
||||||
|
|
||||||
|
def updatePageFormFieldValues(self, page, fields):
|
||||||
|
'''
|
||||||
|
Update the form field values for a given page from a fields dictionary.
|
||||||
|
Copy field texts and values from fields to page.
|
||||||
|
|
||||||
|
:param page: Page reference from PDF writer where the annotations
|
||||||
|
and field data will be updated.
|
||||||
|
:param fields: a Python dictionary of field names (/T) and text
|
||||||
|
values (/V)
|
||||||
|
'''
|
||||||
|
# Iterate through pages, update field values
|
||||||
|
for j in range(0, len(page['/Annots'])):
|
||||||
|
writer_annot = page['/Annots'][j].getObject()
|
||||||
|
for field in fields:
|
||||||
|
if writer_annot.get('/T') == field:
|
||||||
|
writer_annot.update({
|
||||||
|
NameObject("/V"): TextStringObject(fields[field])
|
||||||
|
})
|
||||||
|
|
||||||
|
def cloneReaderDocumentRoot(self, reader):
|
||||||
|
'''
|
||||||
|
Copy the reader document root to the writer.
|
||||||
|
|
||||||
|
:param reader: PdfFileReader from the document root should be copied.
|
||||||
|
:callback after_page_append
|
||||||
|
'''
|
||||||
|
self._root_object = reader.trailer['/Root']
|
||||||
|
|
||||||
|
def cloneDocumentFromReader(self, reader, after_page_append=None):
|
||||||
|
'''
|
||||||
|
Create a copy (clone) of a document from a PDF file reader
|
||||||
|
|
||||||
|
:param reader: PDF file reader instance from which the clone
|
||||||
|
should be created.
|
||||||
|
:callback after_page_append (function): Callback function that is invoked after
|
||||||
|
each page is appended to the writer. Signature includes a reference to the
|
||||||
|
appended page (delegates to appendPagesFromReader). Callback signature:
|
||||||
|
|
||||||
|
:param writer_pageref (PDF page reference): Reference to the page just
|
||||||
|
appended to the document.
|
||||||
|
'''
|
||||||
|
self.cloneReaderDocumentRoot(reader)
|
||||||
|
self.appendPagesFromReader(reader, after_page_append)
|
||||||
|
|
||||||
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
|
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
|
||||||
"""
|
"""
|
||||||
Encrypt this PDF file with the PDF Standard encryption handler.
|
Encrypt this PDF file with the PDF Standard encryption handler.
|
||||||
|
@ -516,7 +668,6 @@ class PdfFileWriter(object):
|
||||||
|
|
||||||
return bookmarkRef
|
return bookmarkRef
|
||||||
|
|
||||||
|
|
||||||
def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
|
def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
|
||||||
"""
|
"""
|
||||||
Add a bookmark to this PDF file.
|
Add a bookmark to this PDF file.
|
||||||
|
@ -553,7 +704,6 @@ class PdfFileWriter(object):
|
||||||
if parent == None:
|
if parent == None:
|
||||||
parent = outlineRef
|
parent = outlineRef
|
||||||
|
|
||||||
|
|
||||||
bookmark = TreeObject()
|
bookmark = TreeObject()
|
||||||
|
|
||||||
bookmark.update({
|
bookmark.update({
|
||||||
|
@ -759,7 +909,7 @@ class PdfFileWriter(object):
|
||||||
else:
|
else:
|
||||||
borderArr = [NumberObject(0)] * 3
|
borderArr = [NumberObject(0)] * 3
|
||||||
|
|
||||||
if isinstance(rect, Str):
|
if isString(rect):
|
||||||
rect = NameObject(rect)
|
rect = NameObject(rect)
|
||||||
elif isinstance(rect, RectangleObject):
|
elif isinstance(rect, RectangleObject):
|
||||||
pass
|
pass
|
||||||
|
@ -871,6 +1021,7 @@ class PdfFileWriter(object):
|
||||||
"""Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
|
"""Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
|
||||||
and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
|
and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
|
||||||
|
|
||||||
|
|
||||||
class PdfFileReader(object):
|
class PdfFileReader(object):
|
||||||
"""
|
"""
|
||||||
Initializes a PdfFileReader object. This operation can take some time, as
|
Initializes a PdfFileReader object. This operation can take some time, as
|
||||||
|
@ -904,9 +1055,10 @@ class PdfFileReader(object):
|
||||||
self.flattenedPages = None
|
self.flattenedPages = None
|
||||||
self.resolvedObjects = {}
|
self.resolvedObjects = {}
|
||||||
self.xrefIndex = 0
|
self.xrefIndex = 0
|
||||||
|
self._pageId2Num = None # map page IndirectRef number to Page Number
|
||||||
if hasattr(stream, 'mode') and 'b' not in stream.mode:
|
if hasattr(stream, 'mode') and 'b' not in stream.mode:
|
||||||
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
|
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
|
||||||
if type(stream) in (string_type, str):
|
if isString(stream):
|
||||||
fileobj = open(stream, 'rb')
|
fileobj = open(stream, 'rb')
|
||||||
stream = BytesIO(b_(fileobj.read()))
|
stream = BytesIO(b_(fileobj.read()))
|
||||||
fileobj.close()
|
fileobj.close()
|
||||||
|
@ -973,6 +1125,7 @@ class PdfFileReader(object):
|
||||||
if self.isEncrypted:
|
if self.isEncrypted:
|
||||||
try:
|
try:
|
||||||
self._override_encryption = True
|
self._override_encryption = True
|
||||||
|
self.decrypt('')
|
||||||
return self.trailer["/Root"]["/Pages"]["/Count"]
|
return self.trailer["/Root"]["/Pages"]["/Count"]
|
||||||
except:
|
except:
|
||||||
raise utils.PdfReadError("File has not been decrypted")
|
raise utils.PdfReadError("File has not been decrypted")
|
||||||
|
@ -1160,7 +1313,14 @@ class PdfFileReader(object):
|
||||||
|
|
||||||
# get the outline dictionary and named destinations
|
# get the outline dictionary and named destinations
|
||||||
if "/Outlines" in catalog:
|
if "/Outlines" in catalog:
|
||||||
lines = catalog["/Outlines"]
|
try:
|
||||||
|
lines = catalog["/Outlines"]
|
||||||
|
except utils.PdfReadError:
|
||||||
|
# this occurs if the /Outlines object reference is incorrect
|
||||||
|
# for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
|
||||||
|
# so continue to load the file without the Bookmarks
|
||||||
|
return outlines
|
||||||
|
|
||||||
if "/First" in lines:
|
if "/First" in lines:
|
||||||
node = lines["/First"]
|
node = lines["/First"]
|
||||||
self._namedDests = self.getNamedDestinations()
|
self._namedDests = self.getNamedDestinations()
|
||||||
|
@ -1187,6 +1347,49 @@ class PdfFileReader(object):
|
||||||
|
|
||||||
return outlines
|
return outlines
|
||||||
|
|
||||||
|
def _getPageNumberByIndirect(self, indirectRef):
|
||||||
|
"""Generate _pageId2Num"""
|
||||||
|
if self._pageId2Num is None:
|
||||||
|
id2num = {}
|
||||||
|
for i, x in enumerate(self.pages):
|
||||||
|
id2num[x.indirectRef.idnum] = i
|
||||||
|
self._pageId2Num = id2num
|
||||||
|
|
||||||
|
if isinstance(indirectRef, int):
|
||||||
|
idnum = indirectRef
|
||||||
|
else:
|
||||||
|
idnum = indirectRef.idnum
|
||||||
|
|
||||||
|
ret = self._pageId2Num.get(idnum, -1)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def getPageNumber(self, page):
|
||||||
|
"""
|
||||||
|
Retrieve page number of a given PageObject
|
||||||
|
|
||||||
|
:param PageObject page: The page to get page number. Should be
|
||||||
|
an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
|
||||||
|
:return: the page number or -1 if page not found
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
indirectRef = page.indirectRef
|
||||||
|
ret = self._getPageNumberByIndirect(indirectRef)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def getDestinationPageNumber(self, destination):
|
||||||
|
"""
|
||||||
|
Retrieve page number of a given Destination object
|
||||||
|
|
||||||
|
:param Destination destination: The destination to get page number.
|
||||||
|
Should be an instance of
|
||||||
|
:class:`Destination<PyPDF2.pdf.Destination>`
|
||||||
|
:return: the page number or -1 if page not found
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
indirectRef = destination.page
|
||||||
|
ret = self._getPageNumberByIndirect(indirectRef)
|
||||||
|
return ret
|
||||||
|
|
||||||
def _buildDestination(self, title, array):
|
def _buildDestination(self, title, array):
|
||||||
page, typ = array[0:2]
|
page, typ = array[0:2]
|
||||||
array = array[2:]
|
array = array[2:]
|
||||||
|
@ -1210,7 +1413,7 @@ class PdfFileReader(object):
|
||||||
if dest:
|
if dest:
|
||||||
if isinstance(dest, ArrayObject):
|
if isinstance(dest, ArrayObject):
|
||||||
outline = self._buildDestination(title, dest)
|
outline = self._buildDestination(title, dest)
|
||||||
elif isinstance(dest, Str) and dest in self._namedDests:
|
elif isString(dest) and dest in self._namedDests:
|
||||||
outline = self._namedDests[dest]
|
outline = self._namedDests[dest]
|
||||||
outline[NameObject("/Title")] = title
|
outline[NameObject("/Title")] = title
|
||||||
else:
|
else:
|
||||||
|
@ -1310,6 +1513,8 @@ class PdfFileReader(object):
|
||||||
assert idx < objStm['/N']
|
assert idx < objStm['/N']
|
||||||
streamData = BytesIO(b_(objStm.getData()))
|
streamData = BytesIO(b_(objStm.getData()))
|
||||||
for i in range(objStm['/N']):
|
for i in range(objStm['/N']):
|
||||||
|
readNonWhitespace(streamData)
|
||||||
|
streamData.seek(-1, 1)
|
||||||
objnum = NumberObject.readFromStream(streamData)
|
objnum = NumberObject.readFromStream(streamData)
|
||||||
readNonWhitespace(streamData)
|
readNonWhitespace(streamData)
|
||||||
streamData.seek(-1, 1)
|
streamData.seek(-1, 1)
|
||||||
|
@ -1347,7 +1552,6 @@ class PdfFileReader(object):
|
||||||
if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
|
if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
|
||||||
return NullObject()
|
return NullObject()
|
||||||
|
|
||||||
|
|
||||||
def getObject(self, indirectReference):
|
def getObject(self, indirectReference):
|
||||||
debug = False
|
debug = False
|
||||||
if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
|
if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
|
||||||
|
@ -1470,7 +1674,7 @@ class PdfFileReader(object):
|
||||||
startxref = int(line)
|
startxref = int(line)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# 'startxref' may be on the same line as the location
|
# 'startxref' may be on the same line as the location
|
||||||
if not line.startswith("startxref"):
|
if not line.startswith(b_("startxref")):
|
||||||
raise utils.PdfReadError("startxref not found")
|
raise utils.PdfReadError("startxref not found")
|
||||||
startxref = int(line[9:].strip())
|
startxref = int(line[9:].strip())
|
||||||
warnings.warn("startxref on same line as offset")
|
warnings.warn("startxref on same line as offset")
|
||||||
|
@ -1580,6 +1784,7 @@ class PdfFileReader(object):
|
||||||
assert len(entrySizes) >= 3
|
assert len(entrySizes) >= 3
|
||||||
if self.strict and len(entrySizes) > 3:
|
if self.strict and len(entrySizes) > 3:
|
||||||
raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
|
raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
|
||||||
|
|
||||||
def getEntry(i):
|
def getEntry(i):
|
||||||
# Reads the correct number of bytes for each entry. See the
|
# Reads the correct number of bytes for each entry. See the
|
||||||
# discussion of the W parameter in PDF spec table 17.
|
# discussion of the W parameter in PDF spec table 17.
|
||||||
|
@ -1664,8 +1869,7 @@ class PdfFileReader(object):
|
||||||
if found:
|
if found:
|
||||||
continue
|
continue
|
||||||
# no xref table found at specified location
|
# no xref table found at specified location
|
||||||
assert False
|
raise utils.PdfReadError("Could not find xref table at specified location")
|
||||||
break
|
|
||||||
#if not zero-indexed, verify that the table is correct; change it if necessary
|
#if not zero-indexed, verify that the table is correct; change it if necessary
|
||||||
if self.xrefIndex and not self.strict:
|
if self.xrefIndex and not self.strict:
|
||||||
loc = stream.tell()
|
loc = stream.tell()
|
||||||
|
@ -1683,7 +1887,6 @@ class PdfFileReader(object):
|
||||||
#if not, then either it's just plain wrong, or the non-zero-index is actually correct
|
#if not, then either it's just plain wrong, or the non-zero-index is actually correct
|
||||||
stream.seek(loc, 0) #return to where it was
|
stream.seek(loc, 0) #return to where it was
|
||||||
|
|
||||||
|
|
||||||
def _zeroXref(self, generation):
|
def _zeroXref(self, generation):
|
||||||
self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
|
self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
|
||||||
|
|
||||||
|
@ -1700,8 +1903,13 @@ class PdfFileReader(object):
|
||||||
if debug: print(">>readNextEndLine")
|
if debug: print(">>readNextEndLine")
|
||||||
line = b_("")
|
line = b_("")
|
||||||
while True:
|
while True:
|
||||||
|
# Prevent infinite loops in malformed PDFs
|
||||||
|
if stream.tell() == 0:
|
||||||
|
raise utils.PdfReadError("Could not read malformed PDF file")
|
||||||
x = stream.read(1)
|
x = stream.read(1)
|
||||||
if debug: print((" x:", x, "%x"%ord(x)))
|
if debug: print((" x:", x, "%x"%ord(x)))
|
||||||
|
if stream.tell() < 2:
|
||||||
|
raise utils.PdfReadError("EOL marker not found")
|
||||||
stream.seek(-2, 1)
|
stream.seek(-2, 1)
|
||||||
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
|
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
|
||||||
crlf = False
|
crlf = False
|
||||||
|
@ -1713,6 +1921,8 @@ class PdfFileReader(object):
|
||||||
if x == b_('\n') or x == b_('\r'): # account for CR+LF
|
if x == b_('\n') or x == b_('\r'): # account for CR+LF
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
crlf = True
|
crlf = True
|
||||||
|
if stream.tell() < 2:
|
||||||
|
raise utils.PdfReadError("EOL marker not found")
|
||||||
stream.seek(-2, 1)
|
stream.seek(-2, 1)
|
||||||
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
|
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
|
||||||
break
|
break
|
||||||
|
@ -1827,14 +2037,17 @@ def getRectangle(self, name, defaults):
|
||||||
setRectangle(self, name, retval)
|
setRectangle(self, name, retval)
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
def setRectangle(self, name, value):
|
def setRectangle(self, name, value):
|
||||||
if not isinstance(name, NameObject):
|
if not isinstance(name, NameObject):
|
||||||
name = NameObject(name)
|
name = NameObject(name)
|
||||||
self[name] = value
|
self[name] = value
|
||||||
|
|
||||||
|
|
||||||
def deleteRectangle(self, name):
|
def deleteRectangle(self, name):
|
||||||
del self[name]
|
del self[name]
|
||||||
|
|
||||||
|
|
||||||
def createRectangleAccessor(name, fallback):
|
def createRectangleAccessor(name, fallback):
|
||||||
return \
|
return \
|
||||||
property(
|
property(
|
||||||
|
@ -1843,6 +2056,7 @@ def createRectangleAccessor(name, fallback):
|
||||||
lambda self: deleteRectangle(self, name)
|
lambda self: deleteRectangle(self, name)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PageObject(DictionaryObject):
|
class PageObject(DictionaryObject):
|
||||||
"""
|
"""
|
||||||
This class represents a single page within a PDF file. Typically this
|
This class represents a single page within a PDF file. Typically this
|
||||||
|
@ -2374,6 +2588,7 @@ class PageObject(DictionaryObject):
|
||||||
for i in operands[0]:
|
for i in operands[0]:
|
||||||
if isinstance(i, TextStringObject):
|
if isinstance(i, TextStringObject):
|
||||||
text += i
|
text += i
|
||||||
|
text += "\n"
|
||||||
return text
|
return text
|
||||||
|
|
||||||
mediaBox = createRectangleAccessor("/MediaBox", ())
|
mediaBox = createRectangleAccessor("/MediaBox", ())
|
||||||
|
@ -2412,6 +2627,7 @@ class PageObject(DictionaryObject):
|
||||||
page's creator.
|
page's creator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class ContentStream(DecodedStreamObject):
|
class ContentStream(DecodedStreamObject):
|
||||||
def __init__(self, stream, pdf):
|
def __init__(self, stream, pdf):
|
||||||
self.pdf = pdf
|
self.pdf = pdf
|
||||||
|
@ -2437,25 +2653,25 @@ class ContentStream(DecodedStreamObject):
|
||||||
if peek == b_('') or ord_(peek) == 0:
|
if peek == b_('') or ord_(peek) == 0:
|
||||||
break
|
break
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
if peek.isalpha() or peek == "'" or peek == '"':
|
if peek.isalpha() or peek == b_("'") or peek == b_('"'):
|
||||||
operator = utils.readUntilRegex(stream,
|
operator = utils.readUntilRegex(stream,
|
||||||
NameObject.delimiterPattern, True)
|
NameObject.delimiterPattern, True)
|
||||||
if operator == "BI":
|
if operator == b_("BI"):
|
||||||
# begin inline image - a completely different parsing
|
# begin inline image - a completely different parsing
|
||||||
# mechanism is required, of course... thanks buddy...
|
# mechanism is required, of course... thanks buddy...
|
||||||
assert operands == []
|
assert operands == []
|
||||||
ii = self._readInlineImage(stream)
|
ii = self._readInlineImage(stream)
|
||||||
self.operations.append((ii, "INLINE IMAGE"))
|
self.operations.append((ii, b_("INLINE IMAGE")))
|
||||||
else:
|
else:
|
||||||
self.operations.append((operands, operator))
|
self.operations.append((operands, operator))
|
||||||
operands = []
|
operands = []
|
||||||
elif peek == '%':
|
elif peek == b_('%'):
|
||||||
# If we encounter a comment in the content stream, we have to
|
# If we encounter a comment in the content stream, we have to
|
||||||
# handle it here. Typically, readObject will handle
|
# handle it here. Typically, readObject will handle
|
||||||
# encountering a comment -- but readObject assumes that
|
# encountering a comment -- but readObject assumes that
|
||||||
# following the comment must be the object we're trying to
|
# following the comment must be the object we're trying to
|
||||||
# read. In this case, it could be an operator instead.
|
# read. In this case, it could be an operator instead.
|
||||||
while peek not in ('\r', '\n'):
|
while peek not in (b_('\r'), b_('\n')):
|
||||||
peek = stream.read(1)
|
peek = stream.read(1)
|
||||||
else:
|
else:
|
||||||
operands.append(readObject(stream, None))
|
operands.append(readObject(stream, None))
|
||||||
|
@ -2467,7 +2683,7 @@ class ContentStream(DecodedStreamObject):
|
||||||
while True:
|
while True:
|
||||||
tok = readNonWhitespace(stream)
|
tok = readNonWhitespace(stream)
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
if tok == "I":
|
if tok == b_("I"):
|
||||||
# "ID" - begin of image data
|
# "ID" - begin of image data
|
||||||
break
|
break
|
||||||
key = readObject(stream, self.pdf)
|
key = readObject(stream, self.pdf)
|
||||||
|
@ -2477,28 +2693,32 @@ class ContentStream(DecodedStreamObject):
|
||||||
settings[key] = value
|
settings[key] = value
|
||||||
# left at beginning of ID
|
# left at beginning of ID
|
||||||
tmp = stream.read(3)
|
tmp = stream.read(3)
|
||||||
assert tmp[:2] == "ID"
|
assert tmp[:2] == b_("ID")
|
||||||
data = ""
|
data = b_("")
|
||||||
while True:
|
while True:
|
||||||
|
# Read the inline image, while checking for EI (End Image) operator.
|
||||||
tok = stream.read(1)
|
tok = stream.read(1)
|
||||||
if tok == "E":
|
if tok == b_("E"):
|
||||||
# Check for End Image
|
# Check for End Image
|
||||||
next1 = stream.read(1)
|
tok2 = stream.read(1)
|
||||||
if next1 == "I":
|
if tok2 == b_("I"):
|
||||||
next2 = readNonWhitespace(stream)
|
# Sometimes that data will contain EI, so check for the Q operator.
|
||||||
if next2 == 'Q':
|
tok3 = stream.read(1)
|
||||||
|
info = tok + tok2
|
||||||
|
while tok3 in utils.WHITESPACES:
|
||||||
|
info += tok3
|
||||||
|
tok3 = stream.read(1)
|
||||||
|
if tok3 == b_("Q"):
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
stream.seek(-2, 1)
|
stream.seek(-1,1)
|
||||||
data += tok
|
data += info
|
||||||
else:
|
else:
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
data += tok
|
data += tok
|
||||||
else:
|
else:
|
||||||
data += tok
|
data += tok
|
||||||
x = readNonWhitespace(stream)
|
|
||||||
stream.seek(-1, 1)
|
|
||||||
return {"settings": settings, "data": data}
|
return {"settings": settings, "data": data}
|
||||||
|
|
||||||
def _getData(self):
|
def _getData(self):
|
||||||
|
@ -2525,6 +2745,7 @@ class ContentStream(DecodedStreamObject):
|
||||||
|
|
||||||
_data = property(_getData, _setData)
|
_data = property(_getData, _setData)
|
||||||
|
|
||||||
|
|
||||||
class DocumentInformation(DictionaryObject):
|
class DocumentInformation(DictionaryObject):
|
||||||
"""
|
"""
|
||||||
A class representing the basic document metadata provided in a PDF File.
|
A class representing the basic document metadata provided in a PDF File.
|
||||||
|
@ -2588,6 +2809,7 @@ class DocumentInformation(DictionaryObject):
|
||||||
producer_raw = property(lambda self: self.get("/Producer"))
|
producer_raw = property(lambda self: self.get("/Producer"))
|
||||||
"""The "raw" version of producer; can return a ``ByteStringObject``."""
|
"""The "raw" version of producer; can return a ``ByteStringObject``."""
|
||||||
|
|
||||||
|
|
||||||
def convertToInt(d, size):
|
def convertToInt(d, size):
|
||||||
if size > 8:
|
if size > 8:
|
||||||
raise utils.PdfReadError("invalid size in convertToInt")
|
raise utils.PdfReadError("invalid size in convertToInt")
|
||||||
|
@ -2600,6 +2822,7 @@ _encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
|
||||||
b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
|
b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
|
||||||
b_('\xa9\xfe\x64\x53\x69\x7a')
|
b_('\xa9\xfe\x64\x53\x69\x7a')
|
||||||
|
|
||||||
|
|
||||||
# Implementation of algorithm 3.2 of the PDF standard security handler,
|
# Implementation of algorithm 3.2 of the PDF standard security handler,
|
||||||
# section 3.5.2 of the PDF 1.6 reference.
|
# section 3.5.2 of the PDF 1.6 reference.
|
||||||
def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
|
def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
|
||||||
|
@ -2643,6 +2866,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
|
||||||
# entry.
|
# entry.
|
||||||
return md5_hash[:keylen]
|
return md5_hash[:keylen]
|
||||||
|
|
||||||
|
|
||||||
# Implementation of algorithm 3.3 of the PDF standard security handler,
|
# Implementation of algorithm 3.3 of the PDF standard security handler,
|
||||||
# section 3.5.2 of the PDF 1.6 reference.
|
# section 3.5.2 of the PDF 1.6 reference.
|
||||||
def _alg33(owner_pwd, user_pwd, rev, keylen):
|
def _alg33(owner_pwd, user_pwd, rev, keylen):
|
||||||
|
@ -2670,6 +2894,7 @@ def _alg33(owner_pwd, user_pwd, rev, keylen):
|
||||||
# the /O entry in the encryption dictionary.
|
# the /O entry in the encryption dictionary.
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
# Steps 1-4 of algorithm 3.3
|
# Steps 1-4 of algorithm 3.3
|
||||||
def _alg33_1(password, rev, keylen):
|
def _alg33_1(password, rev, keylen):
|
||||||
# 1. Pad or truncate the owner password string as described in step 1 of
|
# 1. Pad or truncate the owner password string as described in step 1 of
|
||||||
|
@ -2692,6 +2917,7 @@ def _alg33_1(password, rev, keylen):
|
||||||
key = md5_hash[:keylen]
|
key = md5_hash[:keylen]
|
||||||
return key
|
return key
|
||||||
|
|
||||||
|
|
||||||
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
||||||
# section 3.5.2 of the PDF 1.6 reference.
|
# section 3.5.2 of the PDF 1.6 reference.
|
||||||
def _alg34(password, owner_entry, p_entry, id1_entry):
|
def _alg34(password, owner_entry, p_entry, id1_entry):
|
||||||
|
@ -2706,6 +2932,7 @@ def _alg34(password, owner_entry, p_entry, id1_entry):
|
||||||
# encryption dictionary.
|
# encryption dictionary.
|
||||||
return U, key
|
return U, key
|
||||||
|
|
||||||
|
|
||||||
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
||||||
# section 3.5.2 of the PDF 1.6 reference.
|
# section 3.5.2 of the PDF 1.6 reference.
|
||||||
def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
|
def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
|
||||||
|
|
|
@ -33,25 +33,35 @@ __author_email__ = "biziqe@mathieu.fenniak.net"
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
# "Str" maintains compatibility with Python 2.x.
|
|
||||||
# The next line is obfuscated like this so 2to3 won't change it.
|
|
||||||
try:
|
try:
|
||||||
import __builtin__ as builtins
|
import __builtin__ as builtins
|
||||||
except ImportError: # Py3
|
except ImportError: # Py3
|
||||||
import builtins
|
import builtins
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[0] < 3:
|
xrange_fn = getattr(builtins, "xrange", range)
|
||||||
string_type = unicode
|
_basestring = getattr(builtins, "basestring", str)
|
||||||
bytes_type = str
|
|
||||||
int_types = (int, long)
|
|
||||||
else:
|
|
||||||
string_type = str
|
|
||||||
bytes_type = bytes
|
|
||||||
int_types = (int,)
|
|
||||||
|
|
||||||
Xrange = getattr(builtins, "xrange", range)
|
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
|
||||||
Str = getattr(builtins, "basestring", str)
|
string_type = getattr(builtins, "unicode", str)
|
||||||
|
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
|
||||||
|
|
||||||
|
|
||||||
|
# Make basic type tests more consistent
|
||||||
|
def isString(s):
|
||||||
|
"""Test if arg is a string. Compatible with Python 2 and 3."""
|
||||||
|
return isinstance(s, _basestring)
|
||||||
|
|
||||||
|
|
||||||
|
def isInt(n):
|
||||||
|
"""Test if arg is an int. Compatible with Python 2 and 3."""
|
||||||
|
return isinstance(n, int_types)
|
||||||
|
|
||||||
|
|
||||||
|
def isBytes(b):
|
||||||
|
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
|
||||||
|
return isinstance(b, bytes_type)
|
||||||
|
|
||||||
|
|
||||||
#custom implementation of warnings.formatwarning
|
#custom implementation of warnings.formatwarning
|
||||||
|
@ -59,6 +69,7 @@ def formatWarning(message, category, filename, lineno, line=None):
|
||||||
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
||||||
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
||||||
|
|
||||||
|
|
||||||
def readUntilWhitespace(stream, maxchars=None):
|
def readUntilWhitespace(stream, maxchars=None):
|
||||||
"""
|
"""
|
||||||
Reads non-whitespace characters and returns them.
|
Reads non-whitespace characters and returns them.
|
||||||
|
@ -74,6 +85,7 @@ def readUntilWhitespace(stream, maxchars=None):
|
||||||
break
|
break
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
def readNonWhitespace(stream):
|
def readNonWhitespace(stream):
|
||||||
"""
|
"""
|
||||||
Finds and reads the next non-whitespace character (ignores whitespace).
|
Finds and reads the next non-whitespace character (ignores whitespace).
|
||||||
|
@ -83,6 +95,7 @@ def readNonWhitespace(stream):
|
||||||
tok = stream.read(1)
|
tok = stream.read(1)
|
||||||
return tok
|
return tok
|
||||||
|
|
||||||
|
|
||||||
def skipOverWhitespace(stream):
|
def skipOverWhitespace(stream):
|
||||||
"""
|
"""
|
||||||
Similar to readNonWhitespace, but returns a Boolean if more than
|
Similar to readNonWhitespace, but returns a Boolean if more than
|
||||||
|
@ -95,6 +108,7 @@ def skipOverWhitespace(stream):
|
||||||
cnt+=1
|
cnt+=1
|
||||||
return (cnt > 1)
|
return (cnt > 1)
|
||||||
|
|
||||||
|
|
||||||
def skipOverComment(stream):
|
def skipOverComment(stream):
|
||||||
tok = stream.read(1)
|
tok = stream.read(1)
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
|
@ -102,6 +116,7 @@ def skipOverComment(stream):
|
||||||
while tok not in (b_('\n'), b_('\r')):
|
while tok not in (b_('\n'), b_('\r')):
|
||||||
tok = stream.read(1)
|
tok = stream.read(1)
|
||||||
|
|
||||||
|
|
||||||
def readUntilRegex(stream, regex, ignore_eof=False):
|
def readUntilRegex(stream, regex, ignore_eof=False):
|
||||||
"""
|
"""
|
||||||
Reads until the regular expression pattern matched (ignore the match)
|
Reads until the regular expression pattern matched (ignore the match)
|
||||||
|
@ -125,6 +140,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
|
||||||
name += tok
|
name += tok
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
class ConvertFunctionsToVirtualList(object):
|
class ConvertFunctionsToVirtualList(object):
|
||||||
def __init__(self, lengthFunction, getFunction):
|
def __init__(self, lengthFunction, getFunction):
|
||||||
self.lengthFunction = lengthFunction
|
self.lengthFunction = lengthFunction
|
||||||
|
@ -135,10 +151,10 @@ class ConvertFunctionsToVirtualList(object):
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
if isinstance(index, slice):
|
if isinstance(index, slice):
|
||||||
indices = Xrange(*index.indices(len(self)))
|
indices = xrange_fn(*index.indices(len(self)))
|
||||||
cls = type(self)
|
cls = type(self)
|
||||||
return cls(indices.__len__, lambda idx: self[indices[idx]])
|
return cls(indices.__len__, lambda idx: self[indices[idx]])
|
||||||
if not isinstance(index, int_types):
|
if not isInt(index):
|
||||||
raise TypeError("sequence indices must be integers")
|
raise TypeError("sequence indices must be integers")
|
||||||
len_self = len(self)
|
len_self = len(self)
|
||||||
if index < 0:
|
if index < 0:
|
||||||
|
@ -148,6 +164,7 @@ class ConvertFunctionsToVirtualList(object):
|
||||||
raise IndexError("sequence index out of range")
|
raise IndexError("sequence index out of range")
|
||||||
return self.getFunction(index)
|
return self.getFunction(index)
|
||||||
|
|
||||||
|
|
||||||
def RC4_encrypt(key, plaintext):
|
def RC4_encrypt(key, plaintext):
|
||||||
S = [i for i in range(256)]
|
S = [i for i in range(256)]
|
||||||
j = 0
|
j = 0
|
||||||
|
@ -164,12 +181,14 @@ def RC4_encrypt(key, plaintext):
|
||||||
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
def matrixMultiply(a, b):
|
def matrixMultiply(a, b):
|
||||||
return [[sum([float(i)*float(j)
|
return [[sum([float(i)*float(j)
|
||||||
for i, j in zip(row, col)]
|
for i, j in zip(row, col)]
|
||||||
) for col in zip(*b)]
|
) for col in zip(*b)]
|
||||||
for row in a]
|
for row in a]
|
||||||
|
|
||||||
|
|
||||||
def markLocation(stream):
|
def markLocation(stream):
|
||||||
"""Creates text file showing current location in context."""
|
"""Creates text file showing current location in context."""
|
||||||
# Mainly for debugging
|
# Mainly for debugging
|
||||||
|
@ -182,18 +201,23 @@ def markLocation(stream):
|
||||||
outputDoc.close()
|
outputDoc.close()
|
||||||
stream.seek(-RADIUS, 1)
|
stream.seek(-RADIUS, 1)
|
||||||
|
|
||||||
|
|
||||||
class PyPdfError(Exception):
|
class PyPdfError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PdfReadError(PyPdfError):
|
class PdfReadError(PyPdfError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PageSizeNotDefinedError(PyPdfError):
|
class PageSizeNotDefinedError(PyPdfError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PdfReadWarning(UserWarning):
|
class PdfReadWarning(UserWarning):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PdfStreamError(PdfReadError):
|
class PdfStreamError(PdfReadError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -203,6 +227,7 @@ if sys.version_info[0] < 3:
|
||||||
return s
|
return s
|
||||||
else:
|
else:
|
||||||
B_CACHE = {}
|
B_CACHE = {}
|
||||||
|
|
||||||
def b_(s):
|
def b_(s):
|
||||||
bc = B_CACHE
|
bc = B_CACHE
|
||||||
if s in bc:
|
if s in bc:
|
||||||
|
@ -214,6 +239,8 @@ else:
|
||||||
if len(s) < 2:
|
if len(s) < 2:
|
||||||
bc[s] = r
|
bc[s] = r
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
||||||
def u_(s):
|
def u_(s):
|
||||||
if sys.version_info[0] < 3:
|
if sys.version_info[0] < 3:
|
||||||
return unicode(s, 'unicode_escape')
|
return unicode(s, 'unicode_escape')
|
||||||
|
@ -230,24 +257,28 @@ def str_(b):
|
||||||
else:
|
else:
|
||||||
return b
|
return b
|
||||||
|
|
||||||
|
|
||||||
def ord_(b):
|
def ord_(b):
|
||||||
if sys.version_info[0] < 3 or type(b) == str:
|
if sys.version_info[0] < 3 or type(b) == str:
|
||||||
return ord(b)
|
return ord(b)
|
||||||
else:
|
else:
|
||||||
return b
|
return b
|
||||||
|
|
||||||
|
|
||||||
def chr_(c):
|
def chr_(c):
|
||||||
if sys.version_info[0] < 3:
|
if sys.version_info[0] < 3:
|
||||||
return c
|
return c
|
||||||
else:
|
else:
|
||||||
return chr(c)
|
return chr(c)
|
||||||
|
|
||||||
|
|
||||||
def barray(b):
|
def barray(b):
|
||||||
if sys.version_info[0] < 3:
|
if sys.version_info[0] < 3:
|
||||||
return b
|
return b
|
||||||
else:
|
else:
|
||||||
return bytearray(b)
|
return bytearray(b)
|
||||||
|
|
||||||
|
|
||||||
def hexencode(b):
|
def hexencode(b):
|
||||||
if sys.version_info[0] < 3:
|
if sys.version_info[0] < 3:
|
||||||
return b.encode('hex')
|
return b.encode('hex')
|
||||||
|
@ -256,6 +287,7 @@ def hexencode(b):
|
||||||
coder = codecs.getencoder('hex_codec')
|
coder = codecs.getencoder('hex_codec')
|
||||||
return coder(b)[0]
|
return coder(b)[0]
|
||||||
|
|
||||||
|
|
||||||
def hexStr(num):
|
def hexStr(num):
|
||||||
return hex(num).replace('L', '')
|
return hex(num).replace('L', '')
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,7 @@ iso8601 = re.compile("""
|
||||||
)?
|
)?
|
||||||
""", re.VERBOSE)
|
""", re.VERBOSE)
|
||||||
|
|
||||||
|
|
||||||
class XmpInformation(PdfObject):
|
class XmpInformation(PdfObject):
|
||||||
"""
|
"""
|
||||||
An object that represents Adobe XMP metadata.
|
An object that represents Adobe XMP metadata.
|
||||||
|
@ -355,5 +356,3 @@ class XmpInformation(PdfObject):
|
||||||
:return: a dictionary of key/value items for custom metadata properties.
|
:return: a dictionary of key/value items for custom metadata properties.
|
||||||
:rtype: dict
|
:rtype: dict
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue