From 8bcf0cfe436aadb0b765ac2705fc50fdab7337f7 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 8 Feb 2016 11:51:34 +0530 Subject: [PATCH] avoid hang in large pdfs --- .../lib/python3.4/site-packages/PyPDF2/pdf.py | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py index 5522e4b..d3d4947 100644 --- a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py +++ b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py @@ -46,6 +46,7 @@ import string import math import struct import sys +import uuid from sys import version_info if version_info < ( 3, 0 ): from cStringIO import StringIO @@ -225,8 +226,22 @@ class PdfFileWriter(object): NameObject("/S"): NameObject("/JavaScript"), NameObject("/JS"): NameObject("(%s)" % javascript) }) + js_indirect_object = self._addObject(js) + + # We need a name for parameterized javascript in the pdf file, but it can be anything. + js_string_name = str(uuid.uuid4()) + + js_name_tree = DictionaryObject() + js_name_tree.update({ + NameObject("/JavaScript"): DictionaryObject({ + NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object]) + }) + }) + self._addObject(js_name_tree) + self._root_object.update({ - NameObject("/OpenAction"): self._addObject(js) + NameObject("/OpenAction"): js_indirect_object, + NameObject("/Names"): js_name_tree }) def addAttachment(self, fname, fdata): @@ -1910,26 +1925,21 @@ class PdfFileReader(object): if debug: print((" x:", x, "%x"%ord(x))) if stream.tell() < 2: raise utils.PdfReadError("EOL marker not found") - stream.seek(-2, 1) - if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR - crlf = False - while x == b_('\n') or x == b_('\r'): - if debug: - if ord(x) == 0x0D: print(" x is CR 0D") - elif ord(x) == 0x0A: print(" x is LF 0A") - x = stream.read(1) - if x == b_('\n') or x == b_('\r'): # account for CR+LF - stream.seek(-1, 1) - crlf = True - if stream.tell() < 2: - raise utils.PdfReadError("EOL marker not found") - stream.seek(-2, 1) - stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1 + step = min(stream.tell()-2, 8*1024) + stream.seek(-step, 1) + x = stream.read(step) + if b_('\n') in x or b_('\r') in x: + eol = max(x.rfind(b_('\n')), x.rfind(b_('\r'))) + while eol+1 < len(x) and x[eol+1] in (b_('\n'), b_('\r')): + eol += 1 + line = x[eol+1:] + line + while x[eol] in (b_('\n'), b_('\r')): + eol -= 1 + stream.seek(-step+eol-2, 1) break else: - if debug: print(" x is neither") line = x + line - if debug: print((" RNEL line:", line)) + stream.seek(-step-2, 1) if debug: print("leaving RNEL") return line @@ -2142,7 +2152,7 @@ class PageObject(DictionaryObject): page2Res = res2.get(resource, DictionaryObject()).getObject() renameRes = {} for key in list(page2Res.keys()): - if key in newRes and newRes[key] != page2Res[key]: + if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key): newname = NameObject(key + str(uuid.uuid4())) renameRes[key] = newname newRes[newname] = page2Res[key]