Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Return None instead of -1 when page is not attached #2376

Merged
merged 3 commits into from
Jan 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,21 +1480,21 @@ def compress_content_streams(self, level: int = -1) -> None:
raise ValueError("Page must be part of a PdfWriter")

@property
def page_number(self) -> int:
def page_number(self) -> Optional[int]:
"""
Read-only property which return the page number with the pdf file.

Returns:
int : page number ; -1 if the page is not attached to a pdf
int : page number ; None if the page is not attached to a pdf
"""
if self.indirect_reference is None:
return -1
return None
else:
try:
lst = self.indirect_reference.pdf.pages
return lst.index(self)
except ValueError:
return -1
return None

def _debug_for_extract(self) -> str: # pragma: no cover
out = ""
Expand Down
16 changes: 8 additions & 8 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,32 +803,32 @@ def threads(self) -> Optional[ArrayObject]:

def _get_page_number_by_indirect(
self, indirect_reference: Union[None, int, NullObject, IndirectObject]
) -> int:
) -> Optional[int]:
"""
Generate _page_id2num.

Args:
indirect_reference:

Returns:
The page number.
The page number or None
"""
if self._page_id2num is None:
self._page_id2num = {
x.indirect_reference.idnum: i for i, x in enumerate(self.pages) # type: ignore
}

if indirect_reference is None or isinstance(indirect_reference, NullObject):
return -1
return None
if isinstance(indirect_reference, int):
idnum = indirect_reference
else:
idnum = indirect_reference.idnum
assert self._page_id2num is not None, "hint for mypy"
ret = self._page_id2num.get(idnum, -1)
ret = self._page_id2num.get(idnum, None)
return ret

def get_page_number(self, page: PageObject) -> int:
def get_page_number(self, page: PageObject) -> Optional[int]:
"""
Retrieve page number of a given PageObject.

Expand All @@ -837,19 +837,19 @@ def get_page_number(self, page: PageObject) -> int:
an instance of :class:`PageObject<pypdf._page.PageObject>`

Returns:
The page number or -1 if page is not found
The page number or None if page is not found
"""
return self._get_page_number_by_indirect(page.indirect_reference)

def get_destination_page_number(self, destination: Destination) -> int:
def get_destination_page_number(self, destination: Destination) -> Optional[int]:
"""
Retrieve page number of a given Destination object.

Args:
destination: The destination to get page number.

Returns:
The page number or -1 if page is not found
The page number or None if page is not found
"""
return self._get_page_number_by_indirect(destination.page)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,12 +1199,12 @@ def test_merge_transformed_page_into_blank():
True,
)
blank = PageObject.create_blank_page(width=100, height=100)
assert blank.page_number == -1
assert blank.page_number is None
inserted_blank = writer.add_page(blank)
assert blank.page_number == -1 # the inserted page is a clone
assert blank.page_number is None # the inserted page is a clone
assert inserted_blank.page_number == len(writer.pages) - 1
del writer._pages.get_object()["/Kids"][-1]
assert inserted_blank.page_number == -1
assert inserted_blank.page_number is None


def test_pages_printing():
Expand Down
14 changes: 10 additions & 4 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
from io import BytesIO
from pathlib import Path
from typing import List, Union

import pytest

Expand Down Expand Up @@ -36,6 +37,9 @@
SAMPLE_ROOT = PROJECT_ROOT / "sample-files"


NestedList = Union[int, None, List["NestedList"]]


@pytest.mark.parametrize(
("src", "num_pages"),
[("selenium-pypdf-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)],
Expand Down Expand Up @@ -695,12 +699,14 @@ def test_issue604(caplog, strict):
]
assert normalize_warnings(caplog.text) == msg

def get_dest_pages(x) -> int:
def get_dest_pages(x) -> NestedList:
if isinstance(x, list):
r = [get_dest_pages(y) for y in x]
return r
return [get_dest_pages(y) for y in x]
else:
return pdf.get_destination_page_number(x) + 1
destination_page_number = pdf.get_destination_page_number(x)
if destination_page_number is None:
return destination_page_number
return destination_page_number + 1

out = []

Expand Down
Loading