avoid hang in large pdfs

This commit is contained in:
j 2016-02-08 11:51:34 +05:30
parent 66205d529e
commit 8bcf0cfe43

View file

@ -46,6 +46,7 @@ import string
import math import math
import struct import struct
import sys import sys
import uuid
from sys import version_info from sys import version_info
if version_info < ( 3, 0 ): if version_info < ( 3, 0 ):
from cStringIO import StringIO from cStringIO import StringIO
@ -225,8 +226,22 @@ class PdfFileWriter(object):
NameObject("/S"): NameObject("/JavaScript"), NameObject("/S"): NameObject("/JavaScript"),
NameObject("/JS"): NameObject("(%s)" % javascript) NameObject("/JS"): NameObject("(%s)" % javascript)
}) })
js_indirect_object = self._addObject(js)
# We need a name for parameterized javascript in the pdf file, but it can be anything.
js_string_name = str(uuid.uuid4())
js_name_tree = DictionaryObject()
js_name_tree.update({
NameObject("/JavaScript"): DictionaryObject({
NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object])
})
})
self._addObject(js_name_tree)
self._root_object.update({ self._root_object.update({
NameObject("/OpenAction"): self._addObject(js) NameObject("/OpenAction"): js_indirect_object,
NameObject("/Names"): js_name_tree
}) })
def addAttachment(self, fname, fdata): def addAttachment(self, fname, fdata):
@ -1910,26 +1925,21 @@ class PdfFileReader(object):
if debug: print((" x:", x, "%x"%ord(x))) if debug: print((" x:", x, "%x"%ord(x)))
if stream.tell() < 2: if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found") raise utils.PdfReadError("EOL marker not found")
stream.seek(-2, 1) step = min(stream.tell()-2, 8*1024)
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR stream.seek(-step, 1)
crlf = False x = stream.read(step)
while x == b_('\n') or x == b_('\r'): if b_('\n') in x or b_('\r') in x:
if debug: eol = max(x.rfind(b_('\n')), x.rfind(b_('\r')))
if ord(x) == 0x0D: print(" x is CR 0D") while eol+1 < len(x) and x[eol+1] in (b_('\n'), b_('\r')):
elif ord(x) == 0x0A: print(" x is LF 0A") eol += 1
x = stream.read(1) line = x[eol+1:] + line
if x == b_('\n') or x == b_('\r'): # account for CR+LF while x[eol] in (b_('\n'), b_('\r')):
stream.seek(-1, 1) eol -= 1
crlf = True stream.seek(-step+eol-2, 1)
if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found")
stream.seek(-2, 1)
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
break break
else: else:
if debug: print(" x is neither")
line = x + line line = x + line
if debug: print((" RNEL line:", line)) stream.seek(-step-2, 1)
if debug: print("leaving RNEL") if debug: print("leaving RNEL")
return line return line
@ -2142,7 +2152,7 @@ class PageObject(DictionaryObject):
page2Res = res2.get(resource, DictionaryObject()).getObject() page2Res = res2.get(resource, DictionaryObject()).getObject()
renameRes = {} renameRes = {}
for key in list(page2Res.keys()): for key in list(page2Res.keys()):
if key in newRes and newRes[key] != page2Res[key]: if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key):
newname = NameObject(key + str(uuid.uuid4())) newname = NameObject(key + str(uuid.uuid4()))
renameRes[key] = newname renameRes[key] = newname
newRes[newname] = page2Res[key] newRes[newname] = page2Res[key]