openmedialibrary_platform/Shared/large_pdf.patch

46 lines
2.1 KiB
Diff

diff --git a/Shared/lib/python3.7/site-packages/PyPDF2/pdf.py b/Shared/lib/python3.7/site-packages/PyPDF2/pdf.py
index be8959f..5522e4b 100644
--- a/Shared/lib/python3.7/site-packages/PyPDF2/pdf.py
+++ b/Shared/lib/python3.7/site-packages/PyPDF2/pdf.py
@@ -1910,23 +1910,26 @@ class PdfFileReader(object):
if debug: print((" x:", x, "%x"%ord(x)))
if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found")
- step = min(stream.tell()-2, 8*1024)
- if step < 2:
- raise utils.PdfReadError("EOL marker not found")
- stream.seek(-step, 1)
- x = stream.read(step)
- if b_('\n') in x or b_('\r') in x:
- eol = max(x.rfind(b_('\n')), x.rfind(b_('\r')))
- while eol+1 < len(x) and x[eol+1] in (b_('\n'), b_('\r')):
- eol += 1
- line = x[eol+1:] + line
- while x[eol:eol+1] in (b_('\n'), b_('\r')):
- eol -= 1
- stream.seek(-step+eol, 1)
+ stream.seek(-2, 1)
+ if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
+ crlf = False
+ while x == b_('\n') or x == b_('\r'):
+ if debug:
+ if ord(x) == 0x0D: print(" x is CR 0D")
+ elif ord(x) == 0x0A: print(" x is LF 0A")
+ x = stream.read(1)
+ if x == b_('\n') or x == b_('\r'): # account for CR+LF
+ stream.seek(-1, 1)
+ crlf = True
+ if stream.tell() < 2:
+ raise utils.PdfReadError("EOL marker not found")
+ stream.seek(-2, 1)
+ stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
break
else:
+ if debug: print(" x is neither")
line = x + line
- stream.seek(-step, 1)
+ if debug: print((" RNEL line:", line))
if debug: print("leaving RNEL")
return line