avoid hang in large pdfs
This commit is contained in:
parent
66205d529e
commit
8bcf0cfe43
1 changed files with 29 additions and 19 deletions
|
@ -46,6 +46,7 @@ import string
|
||||||
import math
|
import math
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
|
import uuid
|
||||||
from sys import version_info
|
from sys import version_info
|
||||||
if version_info < ( 3, 0 ):
|
if version_info < ( 3, 0 ):
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
@ -225,8 +226,22 @@ class PdfFileWriter(object):
|
||||||
NameObject("/S"): NameObject("/JavaScript"),
|
NameObject("/S"): NameObject("/JavaScript"),
|
||||||
NameObject("/JS"): NameObject("(%s)" % javascript)
|
NameObject("/JS"): NameObject("(%s)" % javascript)
|
||||||
})
|
})
|
||||||
|
js_indirect_object = self._addObject(js)
|
||||||
|
|
||||||
|
# We need a name for parameterized javascript in the pdf file, but it can be anything.
|
||||||
|
js_string_name = str(uuid.uuid4())
|
||||||
|
|
||||||
|
js_name_tree = DictionaryObject()
|
||||||
|
js_name_tree.update({
|
||||||
|
NameObject("/JavaScript"): DictionaryObject({
|
||||||
|
NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
self._addObject(js_name_tree)
|
||||||
|
|
||||||
self._root_object.update({
|
self._root_object.update({
|
||||||
NameObject("/OpenAction"): self._addObject(js)
|
NameObject("/OpenAction"): js_indirect_object,
|
||||||
|
NameObject("/Names"): js_name_tree
|
||||||
})
|
})
|
||||||
|
|
||||||
def addAttachment(self, fname, fdata):
|
def addAttachment(self, fname, fdata):
|
||||||
|
@ -1910,26 +1925,21 @@ class PdfFileReader(object):
|
||||||
if debug: print((" x:", x, "%x"%ord(x)))
|
if debug: print((" x:", x, "%x"%ord(x)))
|
||||||
if stream.tell() < 2:
|
if stream.tell() < 2:
|
||||||
raise utils.PdfReadError("EOL marker not found")
|
raise utils.PdfReadError("EOL marker not found")
|
||||||
stream.seek(-2, 1)
|
step = min(stream.tell()-2, 8*1024)
|
||||||
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
|
stream.seek(-step, 1)
|
||||||
crlf = False
|
x = stream.read(step)
|
||||||
while x == b_('\n') or x == b_('\r'):
|
if b_('\n') in x or b_('\r') in x:
|
||||||
if debug:
|
eol = max(x.rfind(b_('\n')), x.rfind(b_('\r')))
|
||||||
if ord(x) == 0x0D: print(" x is CR 0D")
|
while eol+1 < len(x) and x[eol+1] in (b_('\n'), b_('\r')):
|
||||||
elif ord(x) == 0x0A: print(" x is LF 0A")
|
eol += 1
|
||||||
x = stream.read(1)
|
line = x[eol+1:] + line
|
||||||
if x == b_('\n') or x == b_('\r'): # account for CR+LF
|
while x[eol] in (b_('\n'), b_('\r')):
|
||||||
stream.seek(-1, 1)
|
eol -= 1
|
||||||
crlf = True
|
stream.seek(-step+eol-2, 1)
|
||||||
if stream.tell() < 2:
|
|
||||||
raise utils.PdfReadError("EOL marker not found")
|
|
||||||
stream.seek(-2, 1)
|
|
||||||
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
if debug: print(" x is neither")
|
|
||||||
line = x + line
|
line = x + line
|
||||||
if debug: print((" RNEL line:", line))
|
stream.seek(-step-2, 1)
|
||||||
if debug: print("leaving RNEL")
|
if debug: print("leaving RNEL")
|
||||||
return line
|
return line
|
||||||
|
|
||||||
|
@ -2142,7 +2152,7 @@ class PageObject(DictionaryObject):
|
||||||
page2Res = res2.get(resource, DictionaryObject()).getObject()
|
page2Res = res2.get(resource, DictionaryObject()).getObject()
|
||||||
renameRes = {}
|
renameRes = {}
|
||||||
for key in list(page2Res.keys()):
|
for key in list(page2Res.keys()):
|
||||||
if key in newRes and newRes[key] != page2Res[key]:
|
if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key):
|
||||||
newname = NameObject(key + str(uuid.uuid4()))
|
newname = NameObject(key + str(uuid.uuid4()))
|
||||||
renameRes[key] = newname
|
renameRes[key] = newname
|
||||||
newRes[newname] = page2Res[key]
|
newRes[newname] = page2Res[key]
|
||||||
|
|
Loading…
Reference in a new issue