Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: report PdfReadError instead of RecursionError #2800

Merged
merged 3 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,7 +1121,12 @@ def _flatten(
obj = page.get_object()
if obj:
# damaged file may have invalid child in /Pages
self._flatten(obj, inherit, **addt)
try:
self._flatten(obj, inherit, **addt)
except RecursionError:
raise PdfReadError(
"Maximum recursion depth reached during page flattening."
)
elif t == "/Page":
for attr_in, value in list(inherit.items()):
# if the page has it's own value, it does not inherit the
Expand Down
5 changes: 4 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,10 @@ def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
def cache_get_indirect_object(
self, generation: int, idnum: int
) -> Optional[PdfObject]:
return self.resolved_objects.get((generation, idnum))
try:
return self.resolved_objects.get((generation, idnum))
except RecursionError:
raise PdfReadError("Maximum recursion depth reached.")

def cache_indirect_object(
self, generation: int, idnum: int, obj: Optional[PdfObject]
Expand Down
14 changes: 13 additions & 1 deletion tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ def test_iss1943():
docinfo = reader.metadata
docinfo.update(
{
NameObject("/CreationDate"): TextStringObject("D:20230705005151Z00'00'"),
NameObject("/CreationDate"): TextStringObject(
"D:20230705005151Z00'00'"
),
NameObject("/ModDate"): TextStringObject("D:20230705005151Z00'00'"),
}
)
Expand Down Expand Up @@ -1577,3 +1579,13 @@ def test_context_manager_with_stream():
with PdfReader(pdf_stream) as reader:
assert not reader.stream.closed
assert not pdf_stream.closed


@pytest.mark.enable_socket()
@pytest.mark.timeout(10)
def test_iss2761():
url = "https://github.com/user-attachments/files/16312198/crash-b26d05712a29b241ac6f9dc7fff57428ba2d1a04.pdf"
name = "iss2761.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=False)
with pytest.raises(PdfReadError):
reader.pages[0].extract_text()
Loading