From a86b652ff46ef08776f27bdcd101ad2d13be4148 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Thu, 28 Dec 2023 19:18:57 +0100 Subject: [PATCH 1/3] MAINT: Return None instead of -1 when page is not attached --- pypdf/_page.py | 8 ++++---- pypdf/_reader.py | 16 ++++++++-------- tests/test_page.py | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index da83b51ce..28160ba07 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1480,21 +1480,21 @@ def compress_content_streams(self, level: int = -1) -> None: raise ValueError("Page must be part of a PdfWriter") @property - def page_number(self) -> int: + def page_number(self) -> Optional[int]: """ Read-only property which return the page number with the pdf file. Returns: - int : page number ; -1 if the page is not attached to a pdf + int : page number ; None if the page is not attached to a pdf """ if self.indirect_reference is None: - return -1 + return None else: try: lst = self.indirect_reference.pdf.pages return lst.index(self) except ValueError: - return -1 + return None def _debug_for_extract(self) -> str: # pragma: no cover out = "" diff --git a/pypdf/_reader.py b/pypdf/_reader.py index fdefffb63..af9f2b04a 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -803,7 +803,7 @@ def threads(self) -> Optional[ArrayObject]: def _get_page_number_by_indirect( self, indirect_reference: Union[None, int, NullObject, IndirectObject] - ) -> int: + ) -> Optional[int]: """ Generate _page_id2num. @@ -811,7 +811,7 @@ def _get_page_number_by_indirect( indirect_reference: Returns: - The page number. + The page number or None """ if self._page_id2num is None: self._page_id2num = { @@ -819,16 +819,16 @@ def _get_page_number_by_indirect( } if indirect_reference is None or isinstance(indirect_reference, NullObject): - return -1 + return None if isinstance(indirect_reference, int): idnum = indirect_reference else: idnum = indirect_reference.idnum assert self._page_id2num is not None, "hint for mypy" - ret = self._page_id2num.get(idnum, -1) + ret = self._page_id2num.get(idnum, None) return ret - def get_page_number(self, page: PageObject) -> int: + def get_page_number(self, page: PageObject) -> Optional[int]: """ Retrieve page number of a given PageObject. @@ -837,11 +837,11 @@ def get_page_number(self, page: PageObject) -> int: an instance of :class:`PageObject` Returns: - The page number or -1 if page is not found + The page number or None if page is not found """ return self._get_page_number_by_indirect(page.indirect_reference) - def get_destination_page_number(self, destination: Destination) -> int: + def get_destination_page_number(self, destination: Destination) -> Optional[int]: """ Retrieve page number of a given Destination object. @@ -849,7 +849,7 @@ def get_destination_page_number(self, destination: Destination) -> int: destination: The destination to get page number. Returns: - The page number or -1 if page is not found + The page number or None if page is not found """ return self._get_page_number_by_indirect(destination.page) diff --git a/tests/test_page.py b/tests/test_page.py index 96e3023b8..84774dbf2 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -1199,12 +1199,12 @@ def test_merge_transformed_page_into_blank(): True, ) blank = PageObject.create_blank_page(width=100, height=100) - assert blank.page_number == -1 + assert blank.page_number is None inserted_blank = writer.add_page(blank) - assert blank.page_number == -1 # the inserted page is a clone + assert blank.page_number is None # the inserted page is a clone assert inserted_blank.page_number == len(writer.pages) - 1 del writer._pages.get_object()["/Kids"][-1] - assert inserted_blank.page_number == -1 + assert inserted_blank.page_number is None def test_pages_printing(): From 9e16b7dd29674bad3f8b0bb39d692c3da63ea684 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Thu, 28 Dec 2023 19:29:32 +0100 Subject: [PATCH 2/3] Fix tests/test_reader.py::test_issue604 --- tests/test_reader.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_reader.py b/tests/test_reader.py index afe434306..209b3f844 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -3,6 +3,7 @@ import time from io import BytesIO from pathlib import Path +from typing import Union import pytest @@ -695,12 +696,15 @@ def test_issue604(caplog, strict): ] assert normalize_warnings(caplog.text) == msg - def get_dest_pages(x) -> int: + def get_dest_pages(x) -> Union[int, None]: if isinstance(x, list): r = [get_dest_pages(y) for y in x] return r else: - return pdf.get_destination_page_number(x) + 1 + des_page_number = pdf.get_destination_page_number(x) + if des_page_number is None: + return des_page_number + return des_page_number + 1 out = [] From 83f1ae38cdf02f69271d3db32c4a8d3b1923e8ea Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 2 Jan 2024 22:21:10 +0100 Subject: [PATCH 3/3] Address stefans review - thanks --- tests/test_reader.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/test_reader.py b/tests/test_reader.py index 209b3f844..ab4a8d302 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -3,7 +3,7 @@ import time from io import BytesIO from pathlib import Path -from typing import Union +from typing import List, Union import pytest @@ -37,6 +37,9 @@ SAMPLE_ROOT = PROJECT_ROOT / "sample-files" +NestedList = Union[int, None, List["NestedList"]] + + @pytest.mark.parametrize( ("src", "num_pages"), [("selenium-pypdf-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)], @@ -696,15 +699,14 @@ def test_issue604(caplog, strict): ] assert normalize_warnings(caplog.text) == msg - def get_dest_pages(x) -> Union[int, None]: + def get_dest_pages(x) -> NestedList: if isinstance(x, list): - r = [get_dest_pages(y) for y in x] - return r + return [get_dest_pages(y) for y in x] else: - des_page_number = pdf.get_destination_page_number(x) - if des_page_number is None: - return des_page_number - return des_page_number + 1 + destination_page_number = pdf.get_destination_page_number(x) + if destination_page_number is None: + return destination_page_number + return destination_page_number + 1 out = []