From 8bcf0cfe436aadb0b765ac2705fc50fdab7337f7 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Mon, 8 Feb 2016 11:51:34 +0530
Subject: [PATCH] avoid hang in large pdfs

---
 .../lib/python3.4/site-packages/PyPDF2/pdf.py | 48 +++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py
index 5522e4b..d3d4947 100644
--- a/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py
+++ b/Shared/lib/python3.4/site-packages/PyPDF2/pdf.py
@@ -46,6 +46,7 @@ import string
 import math
 import struct
 import sys
+import uuid
 from sys import version_info
 if version_info < ( 3, 0 ):
     from cStringIO import StringIO
@@ -225,8 +226,22 @@ class PdfFileWriter(object):
                 NameObject("/S"): NameObject("/JavaScript"),
                 NameObject("/JS"): NameObject("(%s)" % javascript)
                 })
+        js_indirect_object = self._addObject(js)
+
+        # We need a name for parameterized javascript in the pdf file, but it can be anything.
+        js_string_name = str(uuid.uuid4())
+
+        js_name_tree = DictionaryObject()
+        js_name_tree.update({
+                NameObject("/JavaScript"): DictionaryObject({
+                  NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object])
+                })
+              })
+        self._addObject(js_name_tree)
+
         self._root_object.update({
-                NameObject("/OpenAction"): self._addObject(js)
+                NameObject("/OpenAction"): js_indirect_object,
+                NameObject("/Names"): js_name_tree
                 })
 
     def addAttachment(self, fname, fdata):
@@ -1910,26 +1925,21 @@ class PdfFileReader(object):
             if debug: print(("  x:", x, "%x"%ord(x)))
             if stream.tell() < 2:
                 raise utils.PdfReadError("EOL marker not found")
-            stream.seek(-2, 1)
-            if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
-                crlf = False
-                while x == b_('\n') or x == b_('\r'):
-                    if debug:
-                        if ord(x) == 0x0D: print("  x is CR 0D")
-                        elif ord(x) == 0x0A: print("  x is LF 0A")
-                    x = stream.read(1)
-                    if x == b_('\n') or x == b_('\r'): # account for CR+LF
-                        stream.seek(-1, 1)
-                        crlf = True
-                    if stream.tell() < 2:
-                        raise utils.PdfReadError("EOL marker not found")
-                    stream.seek(-2, 1)
-                stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
+            step = min(stream.tell()-2, 8*1024)
+            stream.seek(-step, 1)
+            x = stream.read(step)
+            if b_('\n') in x or b_('\r') in x:
+                eol = max(x.rfind(b_('\n')), x.rfind(b_('\r')))
+                while eol+1 < len(x) and x[eol+1] in (b_('\n'), b_('\r')):
+                    eol += 1
+                line = x[eol+1:] + line
+                while x[eol] in (b_('\n'), b_('\r')):
+                    eol -= 1
+                stream.seek(-step+eol-2, 1)
                 break
             else:
-                if debug: print("  x is neither")
                 line = x + line
-                if debug: print(("  RNEL line:", line))
+                stream.seek(-step-2, 1)
         if debug: print("leaving RNEL")
         return line
 
@@ -2142,7 +2152,7 @@ class PageObject(DictionaryObject):
         page2Res = res2.get(resource, DictionaryObject()).getObject()
         renameRes = {}
         for key in list(page2Res.keys()):
-            if key in newRes and newRes[key] != page2Res[key]:
+            if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key):
                 newname = NameObject(key + str(uuid.uuid4()))
                 renameRes[key] = newname
                 newRes[newname] = page2Res[key]