update pypdf2

This commit is contained in:
j 2016-02-08 11:50:06 +05:30
commit 66205d529e
19 changed files with 626 additions and 315 deletions

View file

@ -1,2 +1 @@
__version__ = '1.23'
__version__ = '1.25.1'

View file

@ -40,28 +40,35 @@ if version_info < ( 3, 0 ):
from cStringIO import StringIO
else:
from io import StringIO
import struct
try:
import zlib
def decompress(data):
return zlib.decompress(data)
def compress(data):
return zlib.compress(data)
except ImportError:
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
for i in range(len(buf)):
retval[i] = ord(buf[i])
return retval
def _bytearr_to_string(bytes):
retval = ""
for i in range(bytes.Length):
retval += chr(bytes[i])
return retval
def _read_bytes(stream):
ms = IO.MemoryStream()
buf = Array.CreateInstance(System.Byte, 2048)
@ -74,6 +81,7 @@ except ImportError:
retval = ms.ToArray()
ms.Close()
return retval
def decompress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
@ -84,6 +92,7 @@ except ImportError:
retval = _bytearr_to_string(bytes)
gz.Close()
return retval
def compress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
@ -106,7 +115,7 @@ class FlateDecode(object):
predictor = decodeParms.get("/Predictor", 1)
except AttributeError:
pass # usually an array with a null object was read
# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms["/Columns"]
@ -144,6 +153,7 @@ class FlateDecode(object):
return compress(data)
encode = staticmethod(encode)
class ASCIIHexDecode(object):
def decode(data, decodeParms=None):
retval = ""
@ -165,6 +175,7 @@ class ASCIIHexDecode(object):
return retval
decode = staticmethod(decode)
class LZWDecode(object):
"""Taken from:
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
@ -184,7 +195,6 @@ class LZWDecode(object):
def resetDict(self):
self.dictlen=258
self.bitspercode=9
def nextCode(self):
fillbits=self.bitspercode
@ -196,8 +206,8 @@ class LZWDecode(object):
bitsfromhere=8-self.bitpos
if bitsfromhere>fillbits:
bitsfromhere=fillbits
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
(0xff >> (8-bitsfromhere))) <<
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
(0xff >> (8-bitsfromhere))) <<
(fillbits-bitsfromhere))
fillbits -= bitsfromhere
self.bitpos += bitsfromhere
@ -235,70 +245,93 @@ class LZWDecode(object):
baos+=p
self.dict[self.dictlen] = p;
self.dictlen+=1
if (self.dictlen >= (1 << self.bitspercode) - 1 and
if (self.dictlen >= (1 << self.bitspercode) - 1 and
self.bitspercode < 12):
self.bitspercode+=1
return baos
@staticmethod
def decode(data,decodeParams=None):
return LZWDecode.decoder(data).decode()
class ASCII85Decode(object):
def decode(data, decodeParms=None):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
if version_info < ( 3, 0 ):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
else:
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
else:
if isinstance(data, str):
data = data.encode('ascii')
n = b = 0
out = bytearray()
for c in data:
if ord('!') <= c and c <= ord('u'):
n += 1
b = b*85+(c-33)
if n == 5:
out += struct.pack(b'>L',b)
n = b = 0
elif c == ord('z'):
assert n == 0
out += b'\0\0\0\0'
elif c == ord('~'):
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack(b'>L',b)[:n-1]
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
return bytes(out)
decode = staticmethod(decode)
def decodeStreamData(stream):
from .generic import NameObject
filters = stream.get("/Filter", ())
@ -306,22 +339,24 @@ def decodeStreamData(stream):
# we have a single filter instance
filters = (filters,)
data = stream._data
for filterType in filters:
if filterType == "/FlateDecode":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode":
data = ASCIIHexDecode.decode(data)
elif filterType == "/LZWDecode":
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCII85Decode":
data = ASCII85Decode.decode(data)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
if "/Name" not in decodeParams and "/Type" not in decodeParams:
pass
# If there is not data to decode we should not try to decode the data.
if data:
for filterType in filters:
if filterType == "/FlateDecode" or filterType == "/Fl":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
data = ASCIIHexDecode.decode(data)
elif filterType == "/LZWDecode" or filterType == "/LZW":
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCII85Decode" or filterType == "/A85":
data = ASCII85Decode.decode(data)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
if "/Name" not in decodeParams and "/Type" not in decodeParams:
pass
else:
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
else:
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
else:
# unsupported filter
raise NotImplementedError("unsupported filter %s" % filterType)
# unsupported filter
raise NotImplementedError("unsupported filter %s" % filterType)
return data

View file

@ -43,11 +43,14 @@ from . import filters
from . import utils
import decimal
import codecs
import sys
#import debugging
ObjectPrefix = b_('/<[tf(n%')
NumberSigns = b_('+-')
IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
def readObject(stream, pdf):
tok = stream.read(1)
stream.seek(-1, 1) # reset to start
@ -94,6 +97,7 @@ def readObject(stream, pdf):
else:
return NumberObject.readFromStream(stream)
class PdfObject(object):
def getObject(self):
"""Resolves indirect references."""
@ -225,6 +229,7 @@ class FloatObject(decimal.Decimal, PdfObject):
return decimal.Decimal.__new__(cls, utils.str_(value), context)
except:
return decimal.Decimal.__new__(cls, str(value))
def __repr__(self):
if self == self.to_integral():
return str(self.quantize(decimal.Decimal(1)))
@ -244,7 +249,11 @@ class NumberObject(int, PdfObject):
ByteDot = b_(".")
def __new__(cls, value):
return int.__new__(cls, value)
val = int(value)
try:
return int.__new__(cls, val)
except OverflowError:
return int.__new__(cls, 0)
def as_numeric(self):
return int(b_(repr(self)))
@ -253,16 +262,7 @@ class NumberObject(int, PdfObject):
stream.write(b_(repr(self)))
def readFromStream(stream):
num = b_("")
while True:
tok = stream.read(16)
m = NumberObject.NumberPattern.search(tok)
if m is not None:
stream.seek(m.start() - len(tok), 1)
num += tok[:m.start()]
break
num += tok
num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
if num.find(NumberObject.ByteDot) != -1:
return FloatObject(num)
else:
@ -345,13 +345,18 @@ def readStringFromStream(stream):
tok = b_("\b")
elif tok == b_("f"):
tok = b_("\f")
elif tok == b_("c"):
tok = b_("\c")
elif tok == b_("("):
tok = b_("(")
elif tok == b_(")"):
tok = b_(")")
elif tok == b_("/"):
tok = b_("/")
elif tok == b_("\\"):
tok = b_("\\")
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")):
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["),
b_("]"), b_("#"), b_("_"), b_("&"), b_('$')):
# odd/unnessecary escape sequences we have encountered
tok = b_(tok)
elif tok.isdigit():
@ -378,7 +383,7 @@ def readStringFromStream(stream):
# line break was escaped:
tok = b_('')
else:
raise utils.PdfReadError("Unexpected escaped string")
raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
txt += tok
return createStringObject(txt)
@ -456,7 +461,7 @@ class TextStringObject(utils.string_type, PdfObject):
class NameObject(str, PdfObject):
delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]"))
delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
surfix = b_("/")
def writeToStream(self, stream, encryption_key):
@ -468,11 +473,12 @@ class NameObject(str, PdfObject):
name = stream.read(1)
if name != NameObject.surfix:
raise utils.PdfReadError("name read error")
name += utils.readUntilRegex(stream, NameObject.delimiterPattern)
name += utils.readUntilRegex(stream, NameObject.delimiterPattern,
ignore_eof=True)
if debug: print(name)
try:
return NameObject(name.decode('utf-8'))
except UnicodeDecodeError as e:
except (UnicodeEncodeError, UnicodeDecodeError) as e:
# Name objects should represent irregular characters
# with a '#' followed by the symbol's hex number
if not pdf.strict:
@ -630,6 +636,7 @@ class DictionaryObject(dict, PdfObject):
return retval
readFromStream = staticmethod(readFromStream)
class TreeObject(DictionaryObject):
def __init__(self):
DictionaryObject.__init__(self)
@ -726,7 +733,6 @@ class TreeObject(DictionaryObject):
found = True
break
prevRef = curRef
prev = cur
if NameObject('/Next') in cur:
@ -938,6 +944,7 @@ class RectangleObject(ArrayObject):
in (x,y) form.
"""
class Field(TreeObject):
"""
A class representing a field dictionary. This class is accessed through
@ -1009,6 +1016,7 @@ class Field(TreeObject):
See Section 8.5.2 of the PDF 1.7 reference.
"""
class Destination(TreeObject):
"""
A class representing a destination within a PDF file.
@ -1157,6 +1165,7 @@ def encode_pdfdocencoding(unicode_string):
"does not exist in translation table")
return retval
def decode_pdfdocencoding(byte_array):
retval = u_('')
for b in byte_array:
@ -1211,4 +1220,3 @@ for i in range(256):
continue
assert char not in _pdfDocEncoding_rev
_pdfDocEncoding_rev[char] = i

View file

@ -28,7 +28,7 @@
# POSSIBILITY OF SUCH DAMAGE.
from .generic import *
from .utils import string_type
from .utils import isString, str_
from .pdf import PdfFileReader, PdfFileWriter
from .pagerange import PageRange
from sys import version_info
@ -40,6 +40,7 @@ else:
from io import FileIO as file
StreamIO = BytesIO
class _MergedPage(object):
"""
_MergedPage is used internally by PdfFileMerger to collect necessary
@ -50,13 +51,14 @@ class _MergedPage(object):
self.pagedata = pagedata
self.out_pagedata = None
self.id = id
class PdfFileMerger(object):
"""
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
into a single PDF. It can concatenate, slice, insert, or any combination
of the above.
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
and :meth:`write()<write>` for usage information.
@ -64,7 +66,7 @@ class PdfFileMerger(object):
problems and also causes some correctable problems to be fatal.
Defaults to ``True``.
"""
def __init__(self, strict=True):
self.inputs = []
self.pages = []
@ -73,7 +75,7 @@ class PdfFileMerger(object):
self.named_dests = []
self.id_count = 0
self.strict = strict
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Merges the pages from the given file into the output file at the
@ -85,29 +87,30 @@ class PdfFileMerger(object):
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
my_file = False
# If the fileobj parameter is a string, assume it is a path
# and create a file object at that location. If it is a file,
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
if type(fileobj) == string_type:
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
elif isinstance(fileobj, file):
@ -116,17 +119,21 @@ class PdfFileMerger(object):
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
orig_tell = fileobj.stream.tell()
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = PdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
# Find the range of pages to merge.
if pages == None:
pages = (0, pdfr.getNumPages())
@ -134,47 +141,45 @@ class PdfFileMerger(object):
pages = pages.indices(pdfr.getNumPages())
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
srcpages = []
if bookmark:
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
outline = []
if import_bookmarks:
outline = pdfr.getOutlines()
outline = self._trim_outline(pdfr, outline, pages)
if bookmark:
self.bookmarks += [bookmark, outline]
else:
self.bookmarks += outline
dests = pdfr.namedDestinations
dests = self._trim_dests(pdfr, dests, pages)
self.named_dests += dests
# Gather all the pages that are going to be merged
for i in range(*pages):
pg = pdfr.getPage(i)
id = self.id_count
self.id_count += 1
mp = _MergedPage(pg, pdfr, id)
srcpages.append(mp)
self._associate_dests_to_pages(srcpages)
self._associate_bookmarks_to_pages(srcpages)
# Slice to insert the pages at the specified position
self.pages[position:position] = srcpages
# Keep track of our input files so we can close them later
self.inputs.append((fileobj, pdfr, my_file))
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
@ -183,7 +188,7 @@ class PdfFileMerger(object):
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
@ -194,10 +199,9 @@ class PdfFileMerger(object):
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
def write(self, fileobj):
"""
Writes all data that has been merged to the given output file.
@ -206,11 +210,10 @@ class PdfFileMerger(object):
file-like object.
"""
my_file = False
if type(fileobj) in (str, str):
if isString(fileobj):
fileobj = file(fileobj, 'wb')
my_file = True
# Add pages to the PdfFileWriter
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
for page in self.pages:
@ -222,15 +225,13 @@ class PdfFileMerger(object):
# Once all pages are added, create bookmarks to point at those pages
self._write_dests()
self._write_bookmarks()
# Write the output to the file
# Write the output to the file
self.output.write(fileobj)
if my_file:
fileobj.close()
def close(self):
"""
Shuts all file descriptors (input and output) and clears all memory
@ -240,7 +241,7 @@ class PdfFileMerger(object):
for fo, pdfr, mine in self.inputs:
if mine:
fo.close()
self.inputs = []
self.output = None
@ -253,7 +254,7 @@ class PdfFileMerger(object):
Example: ``{u'/Title': u'My title'}``
"""
self.output.addMetadata(infos)
def setPageLayout(self, layout):
"""
Set the page layout
@ -289,7 +290,7 @@ class PdfFileMerger(object):
def _trim_dests(self, pdf, dests, pages):
"""
Removes any named destinations that are not a part of the specified
Removes any named destinations that are not a part of the specified
page set.
"""
new_dests = []
@ -298,14 +299,14 @@ class PdfFileMerger(object):
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
o[NameObject('/Page')] = o['/Page'].getObject()
assert str(k) == str(o['/Title'])
assert str_(k) == str_(o['/Title'])
new_dests.append(o)
break
return new_dests
def _trim_outline(self, pdf, outline, pages):
"""
Removes any outline/bookmark entries that are not a part of the
Removes any outline/bookmark entries that are not a part of the
specified page set.
"""
new_outline = []
@ -326,10 +327,10 @@ class PdfFileMerger(object):
prev_header_added = True
break
return new_outline
def _write_dests(self):
dests = self.named_dests
for v in dests:
pageno = None
pdf = None
@ -342,19 +343,18 @@ class PdfFileMerger(object):
break
if pageno != None:
self.output.addNamedDestinationObject(v)
def _write_bookmarks(self, bookmarks=None, parent=None):
if bookmarks == None:
bookmarks = self.bookmarks
last_added = None
for b in bookmarks:
if isinstance(b, list):
self._write_bookmarks(b, last_added)
continue
pageno = None
pdf = None
if '/Page' in b:
@ -410,31 +410,31 @@ class PdfFileMerger(object):
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
pageno = i
pdf = p.src
break
if pageno != None:
del b['/Page'], b['/Type']
last_added = self.output.addBookmarkDict(b, parent)
last_added = self.output.addBookmarkDict(b, parent)
def _associate_dests_to_pages(self, pages):
for nd in self.named_dests:
pageno = None
np = nd['/Page']
if isinstance(np, NumberObject):
continue
for p in pages:
if np.getObject() == p.pagedata.getObject():
pageno = p.id
if pageno != None:
nd[NameObject('/Page')] = NumberObject(pageno)
else:
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
if bookmarks == None:
bookmarks = self.bookmarks
@ -443,35 +443,35 @@ class PdfFileMerger(object):
if isinstance(b, list):
self._associate_bookmarks_to_pages(pages, b)
continue
pageno = None
bp = b['/Page']
if isinstance(bp, NumberObject):
continue
for p in pages:
if bp.getObject() == p.pagedata.getObject():
pageno = p.id
if pageno != None:
b[NameObject('/Page')] = NumberObject(pageno)
else:
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
def findBookmark(self, bookmark, root=None):
if root == None:
root = self.bookmarks
for i, b in enumerate(root):
if isinstance(b, list):
res = self.findBookmark(bookmark, b)
if res:
return [i] + res
elif b == bookmark or b['/Title'] == bookmark:
return [i]
return None
if root == None:
root = self.bookmarks
for i, b in enumerate(root):
if isinstance(b, list):
res = self.findBookmark(bookmark, b)
if res:
return [i] + res
elif b == bookmark or b['/Title'] == bookmark:
return [i]
return None
def addBookmark(self, title, pagenum, parent=None):
"""
@ -483,28 +483,27 @@ class PdfFileMerger(object):
bookmarks.
"""
if parent == None:
iloc = [len(self.bookmarks)-1]
iloc = [len(self.bookmarks)-1]
elif isinstance(parent, list):
iloc = parent
iloc = parent
else:
iloc = self.findBookmark(parent)
iloc = self.findBookmark(parent)
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
if parent == None:
self.bookmarks.append(dest)
self.bookmarks.append(dest)
else:
bmparent = self.bookmarks
for i in iloc[:-1]:
bmparent = bmparent[i]
npos = iloc[-1]+1
if npos < len(bmparent) and isinstance(bmparent[npos], list):
bmparent[npos].append(dest)
else:
bmparent.insert(npos, [dest])
bmparent = self.bookmarks
for i in iloc[:-1]:
bmparent = bmparent[i]
npos = iloc[-1]+1
if npos < len(bmparent) and isinstance(bmparent[npos], list):
bmparent[npos].append(dest)
else:
bmparent.insert(npos, [dest])
return dest
def addNamedDestination(self, title, pagenum):
"""
Add a destination to the output.
@ -512,7 +511,7 @@ class PdfFileMerger(object):
:param str title: Title to use
:param int pagenum: Page number this destination points at.
"""
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
self.named_dests.append(dest)
@ -523,12 +522,12 @@ class OutlinesObject(list):
self.tree = tree
self.pdf = pdf
self.parent = parent
def remove(self, index):
obj = self[index]
del self[index]
self.tree.removeChild(obj)
def add(self, title, pagenum):
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
action = DictionaryObject()
@ -547,7 +546,7 @@ class OutlinesObject(list):
self.pdf._addObject(bookmark)
self.tree.addChild(bookmark)
def removeAll(self):
for child in [x for x in self.tree.children()]:
self.tree.removeChild(child)

View file

@ -8,7 +8,7 @@ see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
"""
import re
from .utils import Str
from .utils import isString
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
@ -32,11 +32,11 @@ PAGE_RANGE_HELP = """Remember, page indices start with zero.
::-1 all pages in reverse order.
"""
class PageRange(object):
"""
"""
A slice-like representation of a range of page indices,
i.e. page numbers, only starting at zero.
i.e. page numbers, only starting at zero.
The syntax is like what you would put between brackets [ ].
The slice is one of the few Python types that can't be subclassed,
but this class converts to and from slices, and allows similar use.
@ -46,7 +46,7 @@ class PageRange(object):
o str() and repr() allow printing.
o indices(n) is like slice.indices(n).
"""
def __init__(self, arg):
"""
Initialize with either a slice -- giving the equivalent page range,
@ -67,8 +67,8 @@ class PageRange(object):
if isinstance(arg, PageRange):
self._slice = arg.to_slice()
return
m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg)
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
if not m:
raise ParseError(arg)
elif m.group(2):
@ -77,25 +77,25 @@ class PageRange(object):
stop = start + 1 if start != -1 else None
self._slice = slice(start, stop)
else:
self._slice = slice(*[int(g) if g else None
self._slice = slice(*[int(g) if g else None
for g in m.group(4, 6, 8)])
# Just formatting this when there is __doc__ for __init__
if __init__.__doc__:
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
@staticmethod
def valid(input):
""" True if input is a valid initializer for a PageRange. """
return isinstance(input, slice) or \
isinstance(input, PageRange) or \
(isinstance(input, Str)
(isString(input)
and bool(re.match(PAGE_RANGE_RE, input)))
def to_slice(self):
""" Return the slice equivalent of this page range. """
return self._slice
def __str__(self):
""" A string like "1:2:3". """
s = self._slice
@ -127,7 +127,7 @@ def parse_filename_page_ranges(args):
"""
Given a list of filenames and page ranges, return a list of
(filename, page_range) pairs.
First arg must be a filename; other ags are filenames, page-range
First arg must be a filename; other ags are filenames, page-range
expressions, slice objects, or PageRange objects.
A filename not followed by a page range indicates all pages of the file.
"""
@ -146,7 +146,7 @@ def parse_filename_page_ranges(args):
# New filename or end of list--do all of the previous file?
if pdf_filename and not did_page_range:
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
pdf_filename = arg
did_page_range = False
return pairs

View file

@ -63,7 +63,7 @@ import warnings
import codecs
from .generic import *
from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
from .utils import Str, b_, u_, ord_, chr_, str_, string_type, formatWarning
from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning
if version_info < ( 2, 4 ):
from sets import ImmutableSet as frozenset
@ -74,6 +74,7 @@ else:
from hashlib import md5
import uuid
class PdfFileWriter(object):
"""
This class supports writing PDF files out, given pages produced by another
@ -228,6 +229,157 @@ class PdfFileWriter(object):
NameObject("/OpenAction"): self._addObject(js)
})
def addAttachment(self, fname, fdata):
"""
Embed a file inside the PDF.
:param str fname: The filename to display.
:param str fdata: The data in the file.
Reference:
https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
Section 7.11.3
"""
# We need 3 entries:
# * The file's data
# * The /Filespec entry
# * The file's name, which goes in the Catalog
# The entry for the file
""" Sample:
8 0 obj
<<
/Length 12
/Type /EmbeddedFile
>>
stream
Hello world!
endstream
endobj
"""
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile")
})
# The Filespec entry
""" Sample:
7 0 obj
<<
/Type /Filespec
/F (hello.txt)
/EF << /F 8 0 R >>
>>
"""
efEntry = DictionaryObject()
efEntry.update({ NameObject("/F"):file_entry })
filespec = DictionaryObject()
filespec.update({
NameObject("/Type"): NameObject("/Filespec"),
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
NameObject("/EF"): efEntry
})
# Then create the entry for the root, as it needs a reference to the Filespec
""" Sample:
1 0 obj
<<
/Type /Catalog
/Outlines 2 0 R
/Pages 3 0 R
/Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
>>
endobj
"""
embeddedFilesNamesDictionary = DictionaryObject()
embeddedFilesNamesDictionary.update({
NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
})
embeddedFilesDictionary = DictionaryObject()
embeddedFilesDictionary.update({
NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
})
# Update the root
self._root_object.update({
NameObject("/Names"): embeddedFilesDictionary
})
def appendPagesFromReader(self, reader, after_page_append=None):
"""
Copy pages from reader to writer. Includes an optional callback parameter
which is invoked after pages are appended to the writer.
:param reader: a PdfFileReader object from which to copy page
annotations to this writer object. The writer's annots
will then be updated
:callback after_page_append (function): Callback function that is invoked after
each page is appended to the writer. Callback signature:
:param writer_pageref (PDF page reference): Reference to the page
appended to the writer.
"""
# Get page count from writer and reader
reader_num_pages = reader.getNumPages()
writer_num_pages = self.getNumPages()
# Copy pages from reader to writer
for rpagenum in range(0, reader_num_pages):
reader_page = reader.getPage(rpagenum)
self.addPage(reader_page)
writer_page = self.getPage(writer_num_pages+rpagenum)
# Trigger callback, pass writer page as parameter
if callable(after_page_append): after_page_append(writer_page)
def updatePageFormFieldValues(self, page, fields):
'''
Update the form field values for a given page from a fields dictionary.
Copy field texts and values from fields to page.
:param page: Page reference from PDF writer where the annotations
and field data will be updated.
:param fields: a Python dictionary of field names (/T) and text
values (/V)
'''
# Iterate through pages, update field values
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in fields:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/V"): TextStringObject(fields[field])
})
def cloneReaderDocumentRoot(self, reader):
'''
Copy the reader document root to the writer.
:param reader: PdfFileReader from the document root should be copied.
:callback after_page_append
'''
self._root_object = reader.trailer['/Root']
def cloneDocumentFromReader(self, reader, after_page_append=None):
'''
Create a copy (clone) of a document from a PDF file reader
:param reader: PDF file reader instance from which the clone
should be created.
:callback after_page_append (function): Callback function that is invoked after
each page is appended to the writer. Signature includes a reference to the
appended page (delegates to appendPagesFromReader). Callback signature:
:param writer_pageref (PDF page reference): Reference to the page just
appended to the document.
'''
self.cloneReaderDocumentRoot(reader)
self.appendPagesFromReader(reader, after_page_append)
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
"""
Encrypt this PDF file with the PDF Standard encryption handler.
@ -516,7 +668,6 @@ class PdfFileWriter(object):
return bookmarkRef
def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
"""
Add a bookmark to this PDF file.
@ -553,7 +704,6 @@ class PdfFileWriter(object):
if parent == None:
parent = outlineRef
bookmark = TreeObject()
bookmark.update({
@ -759,7 +909,7 @@ class PdfFileWriter(object):
else:
borderArr = [NumberObject(0)] * 3
if isinstance(rect, Str):
if isString(rect):
rect = NameObject(rect)
elif isinstance(rect, RectangleObject):
pass
@ -871,6 +1021,7 @@ class PdfFileWriter(object):
"""Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
class PdfFileReader(object):
"""
Initializes a PdfFileReader object. This operation can take some time, as
@ -904,9 +1055,10 @@ class PdfFileReader(object):
self.flattenedPages = None
self.resolvedObjects = {}
self.xrefIndex = 0
self._pageId2Num = None # map page IndirectRef number to Page Number
if hasattr(stream, 'mode') and 'b' not in stream.mode:
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
if type(stream) in (string_type, str):
if isString(stream):
fileobj = open(stream, 'rb')
stream = BytesIO(b_(fileobj.read()))
fileobj.close()
@ -973,6 +1125,7 @@ class PdfFileReader(object):
if self.isEncrypted:
try:
self._override_encryption = True
self.decrypt('')
return self.trailer["/Root"]["/Pages"]["/Count"]
except:
raise utils.PdfReadError("File has not been decrypted")
@ -1160,7 +1313,14 @@ class PdfFileReader(object):
# get the outline dictionary and named destinations
if "/Outlines" in catalog:
lines = catalog["/Outlines"]
try:
lines = catalog["/Outlines"]
except utils.PdfReadError:
# this occurs if the /Outlines object reference is incorrect
# for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
# so continue to load the file without the Bookmarks
return outlines
if "/First" in lines:
node = lines["/First"]
self._namedDests = self.getNamedDestinations()
@ -1187,6 +1347,49 @@ class PdfFileReader(object):
return outlines
def _getPageNumberByIndirect(self, indirectRef):
"""Generate _pageId2Num"""
if self._pageId2Num is None:
id2num = {}
for i, x in enumerate(self.pages):
id2num[x.indirectRef.idnum] = i
self._pageId2Num = id2num
if isinstance(indirectRef, int):
idnum = indirectRef
else:
idnum = indirectRef.idnum
ret = self._pageId2Num.get(idnum, -1)
return ret
def getPageNumber(self, page):
"""
Retrieve page number of a given PageObject
:param PageObject page: The page to get page number. Should be
an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
:return: the page number or -1 if page not found
:rtype: int
"""
indirectRef = page.indirectRef
ret = self._getPageNumberByIndirect(indirectRef)
return ret
def getDestinationPageNumber(self, destination):
"""
Retrieve page number of a given Destination object
:param Destination destination: The destination to get page number.
Should be an instance of
:class:`Destination<PyPDF2.pdf.Destination>`
:return: the page number or -1 if page not found
:rtype: int
"""
indirectRef = destination.page
ret = self._getPageNumberByIndirect(indirectRef)
return ret
def _buildDestination(self, title, array):
page, typ = array[0:2]
array = array[2:]
@ -1210,7 +1413,7 @@ class PdfFileReader(object):
if dest:
if isinstance(dest, ArrayObject):
outline = self._buildDestination(title, dest)
elif isinstance(dest, Str) and dest in self._namedDests:
elif isString(dest) and dest in self._namedDests:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title
else:
@ -1310,6 +1513,8 @@ class PdfFileReader(object):
assert idx < objStm['/N']
streamData = BytesIO(b_(objStm.getData()))
for i in range(objStm['/N']):
readNonWhitespace(streamData)
streamData.seek(-1, 1)
objnum = NumberObject.readFromStream(streamData)
readNonWhitespace(streamData)
streamData.seek(-1, 1)
@ -1347,7 +1552,6 @@ class PdfFileReader(object):
if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
return NullObject()
def getObject(self, indirectReference):
debug = False
if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
@ -1470,7 +1674,7 @@ class PdfFileReader(object):
startxref = int(line)
except ValueError:
# 'startxref' may be on the same line as the location
if not line.startswith("startxref"):
if not line.startswith(b_("startxref")):
raise utils.PdfReadError("startxref not found")
startxref = int(line[9:].strip())
warnings.warn("startxref on same line as offset")
@ -1580,6 +1784,7 @@ class PdfFileReader(object):
assert len(entrySizes) >= 3
if self.strict and len(entrySizes) > 3:
raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
def getEntry(i):
# Reads the correct number of bytes for each entry. See the
# discussion of the W parameter in PDF spec table 17.
@ -1664,8 +1869,7 @@ class PdfFileReader(object):
if found:
continue
# no xref table found at specified location
assert False
break
raise utils.PdfReadError("Could not find xref table at specified location")
#if not zero-indexed, verify that the table is correct; change it if necessary
if self.xrefIndex and not self.strict:
loc = stream.tell()
@ -1683,7 +1887,6 @@ class PdfFileReader(object):
#if not, then either it's just plain wrong, or the non-zero-index is actually correct
stream.seek(loc, 0) #return to where it was
def _zeroXref(self, generation):
self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
@ -1700,8 +1903,13 @@ class PdfFileReader(object):
if debug: print(">>readNextEndLine")
line = b_("")
while True:
# Prevent infinite loops in malformed PDFs
if stream.tell() == 0:
raise utils.PdfReadError("Could not read malformed PDF file")
x = stream.read(1)
if debug: print((" x:", x, "%x"%ord(x)))
if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found")
stream.seek(-2, 1)
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
crlf = False
@ -1713,6 +1921,8 @@ class PdfFileReader(object):
if x == b_('\n') or x == b_('\r'): # account for CR+LF
stream.seek(-1, 1)
crlf = True
if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found")
stream.seek(-2, 1)
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
break
@ -1827,14 +2037,17 @@ def getRectangle(self, name, defaults):
setRectangle(self, name, retval)
return retval
def setRectangle(self, name, value):
if not isinstance(name, NameObject):
name = NameObject(name)
self[name] = value
def deleteRectangle(self, name):
del self[name]
def createRectangleAccessor(name, fallback):
return \
property(
@ -1843,6 +2056,7 @@ def createRectangleAccessor(name, fallback):
lambda self: deleteRectangle(self, name)
)
class PageObject(DictionaryObject):
"""
This class represents a single page within a PDF file. Typically this
@ -2374,6 +2588,7 @@ class PageObject(DictionaryObject):
for i in operands[0]:
if isinstance(i, TextStringObject):
text += i
text += "\n"
return text
mediaBox = createRectangleAccessor("/MediaBox", ())
@ -2412,6 +2627,7 @@ class PageObject(DictionaryObject):
page's creator.
"""
class ContentStream(DecodedStreamObject):
def __init__(self, stream, pdf):
self.pdf = pdf
@ -2437,25 +2653,25 @@ class ContentStream(DecodedStreamObject):
if peek == b_('') or ord_(peek) == 0:
break
stream.seek(-1, 1)
if peek.isalpha() or peek == "'" or peek == '"':
if peek.isalpha() or peek == b_("'") or peek == b_('"'):
operator = utils.readUntilRegex(stream,
NameObject.delimiterPattern, True)
if operator == "BI":
if operator == b_("BI"):
# begin inline image - a completely different parsing
# mechanism is required, of course... thanks buddy...
assert operands == []
ii = self._readInlineImage(stream)
self.operations.append((ii, "INLINE IMAGE"))
self.operations.append((ii, b_("INLINE IMAGE")))
else:
self.operations.append((operands, operator))
operands = []
elif peek == '%':
elif peek == b_('%'):
# If we encounter a comment in the content stream, we have to
# handle it here. Typically, readObject will handle
# encountering a comment -- but readObject assumes that
# following the comment must be the object we're trying to
# read. In this case, it could be an operator instead.
while peek not in ('\r', '\n'):
while peek not in (b_('\r'), b_('\n')):
peek = stream.read(1)
else:
operands.append(readObject(stream, None))
@ -2467,7 +2683,7 @@ class ContentStream(DecodedStreamObject):
while True:
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
if tok == "I":
if tok == b_("I"):
# "ID" - begin of image data
break
key = readObject(stream, self.pdf)
@ -2477,28 +2693,32 @@ class ContentStream(DecodedStreamObject):
settings[key] = value
# left at beginning of ID
tmp = stream.read(3)
assert tmp[:2] == "ID"
data = ""
assert tmp[:2] == b_("ID")
data = b_("")
while True:
# Read the inline image, while checking for EI (End Image) operator.
tok = stream.read(1)
if tok == "E":
if tok == b_("E"):
# Check for End Image
next1 = stream.read(1)
if next1 == "I":
next2 = readNonWhitespace(stream)
if next2 == 'Q':
tok2 = stream.read(1)
if tok2 == b_("I"):
# Sometimes that data will contain EI, so check for the Q operator.
tok3 = stream.read(1)
info = tok + tok2
while tok3 in utils.WHITESPACES:
info += tok3
tok3 = stream.read(1)
if tok3 == b_("Q"):
stream.seek(-1, 1)
break
else:
stream.seek(-2, 1)
data += tok
stream.seek(-1,1)
data += info
else:
stream.seek(-1, 1)
data += tok
else:
data += tok
x = readNonWhitespace(stream)
stream.seek(-1, 1)
return {"settings": settings, "data": data}
def _getData(self):
@ -2525,6 +2745,7 @@ class ContentStream(DecodedStreamObject):
_data = property(_getData, _setData)
class DocumentInformation(DictionaryObject):
"""
A class representing the basic document metadata provided in a PDF File.
@ -2588,6 +2809,7 @@ class DocumentInformation(DictionaryObject):
producer_raw = property(lambda self: self.get("/Producer"))
"""The "raw" version of producer; can return a ``ByteStringObject``."""
def convertToInt(d, size):
if size > 8:
raise utils.PdfReadError("invalid size in convertToInt")
@ -2600,6 +2822,7 @@ _encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
b_('\xa9\xfe\x64\x53\x69\x7a')
# Implementation of algorithm 3.2 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
@ -2643,6 +2866,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
# entry.
return md5_hash[:keylen]
# Implementation of algorithm 3.3 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg33(owner_pwd, user_pwd, rev, keylen):
@ -2670,6 +2894,7 @@ def _alg33(owner_pwd, user_pwd, rev, keylen):
# the /O entry in the encryption dictionary.
return val
# Steps 1-4 of algorithm 3.3
def _alg33_1(password, rev, keylen):
# 1. Pad or truncate the owner password string as described in step 1 of
@ -2692,6 +2917,7 @@ def _alg33_1(password, rev, keylen):
key = md5_hash[:keylen]
return key
# Implementation of algorithm 3.4 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg34(password, owner_entry, p_entry, id1_entry):
@ -2706,6 +2932,7 @@ def _alg34(password, owner_entry, p_entry, id1_entry):
# encryption dictionary.
return U, key
# Implementation of algorithm 3.4 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):

View file

@ -33,25 +33,35 @@ __author_email__ = "biziqe@mathieu.fenniak.net"
import sys
# "Str" maintains compatibility with Python 2.x.
# The next line is obfuscated like this so 2to3 won't change it.
try:
import __builtin__ as builtins
except ImportError: # Py3
import builtins
if sys.version_info[0] < 3:
string_type = unicode
bytes_type = str
int_types = (int, long)
else:
string_type = str
bytes_type = bytes
int_types = (int,)
xrange_fn = getattr(builtins, "xrange", range)
_basestring = getattr(builtins, "basestring", str)
Xrange = getattr(builtins, "xrange", range)
Str = getattr(builtins, "basestring", str)
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
string_type = getattr(builtins, "unicode", str)
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
# Make basic type tests more consistent
def isString(s):
"""Test if arg is a string. Compatible with Python 2 and 3."""
return isinstance(s, _basestring)
def isInt(n):
"""Test if arg is an int. Compatible with Python 2 and 3."""
return isinstance(n, int_types)
def isBytes(b):
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
return isinstance(b, bytes_type)
#custom implementation of warnings.formatwarning
@ -59,6 +69,7 @@ def formatWarning(message, category, filename, lineno, line=None):
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
def readUntilWhitespace(stream, maxchars=None):
"""
Reads non-whitespace characters and returns them.
@ -74,6 +85,7 @@ def readUntilWhitespace(stream, maxchars=None):
break
return txt
def readNonWhitespace(stream):
"""
Finds and reads the next non-whitespace character (ignores whitespace).
@ -83,6 +95,7 @@ def readNonWhitespace(stream):
tok = stream.read(1)
return tok
def skipOverWhitespace(stream):
"""
Similar to readNonWhitespace, but returns a Boolean if more than
@ -95,6 +108,7 @@ def skipOverWhitespace(stream):
cnt+=1
return (cnt > 1)
def skipOverComment(stream):
tok = stream.read(1)
stream.seek(-1, 1)
@ -102,6 +116,7 @@ def skipOverComment(stream):
while tok not in (b_('\n'), b_('\r')):
tok = stream.read(1)
def readUntilRegex(stream, regex, ignore_eof=False):
"""
Reads until the regular expression pattern matched (ignore the match)
@ -125,6 +140,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
name += tok
return name
class ConvertFunctionsToVirtualList(object):
def __init__(self, lengthFunction, getFunction):
self.lengthFunction = lengthFunction
@ -135,10 +151,10 @@ class ConvertFunctionsToVirtualList(object):
def __getitem__(self, index):
if isinstance(index, slice):
indices = Xrange(*index.indices(len(self)))
indices = xrange_fn(*index.indices(len(self)))
cls = type(self)
return cls(indices.__len__, lambda idx: self[indices[idx]])
if not isinstance(index, int_types):
if not isInt(index):
raise TypeError("sequence indices must be integers")
len_self = len(self)
if index < 0:
@ -148,6 +164,7 @@ class ConvertFunctionsToVirtualList(object):
raise IndexError("sequence index out of range")
return self.getFunction(index)
def RC4_encrypt(key, plaintext):
S = [i for i in range(256)]
j = 0
@ -164,12 +181,14 @@ def RC4_encrypt(key, plaintext):
retval += b_(chr(ord_(plaintext[x]) ^ t))
return retval
def matrixMultiply(a, b):
return [[sum([float(i)*float(j)
for i, j in zip(row, col)]
) for col in zip(*b)]
for row in a]
def markLocation(stream):
"""Creates text file showing current location in context."""
# Mainly for debugging
@ -182,18 +201,23 @@ def markLocation(stream):
outputDoc.close()
stream.seek(-RADIUS, 1)
class PyPdfError(Exception):
pass
class PdfReadError(PyPdfError):
pass
class PageSizeNotDefinedError(PyPdfError):
pass
class PdfReadWarning(UserWarning):
pass
class PdfStreamError(PdfReadError):
pass
@ -203,6 +227,7 @@ if sys.version_info[0] < 3:
return s
else:
B_CACHE = {}
def b_(s):
bc = B_CACHE
if s in bc:
@ -214,6 +239,8 @@ else:
if len(s) < 2:
bc[s] = r
return r
def u_(s):
if sys.version_info[0] < 3:
return unicode(s, 'unicode_escape')
@ -230,24 +257,28 @@ def str_(b):
else:
return b
def ord_(b):
if sys.version_info[0] < 3 or type(b) == str:
return ord(b)
else:
return b
def chr_(c):
if sys.version_info[0] < 3:
return c
else:
return chr(c)
def barray(b):
if sys.version_info[0] < 3:
return b
else:
return bytearray(b)
def hexencode(b):
if sys.version_info[0] < 3:
return b.encode('hex')
@ -256,6 +287,7 @@ def hexencode(b):
coder = codecs.getencoder('hex_codec')
return coder(b)[0]
def hexStr(num):
return hex(num).replace('L', '')

View file

@ -50,6 +50,7 @@ iso8601 = re.compile("""
)?
""", re.VERBOSE)
class XmpInformation(PdfObject):
"""
An object that represents Adobe XMP metadata.
@ -355,5 +356,3 @@ class XmpInformation(PdfObject):
:return: a dictionary of key/value items for custom metadata properties.
:rtype: dict
"""