diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 06f38b03b..b910bc829 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -168,7 +168,7 @@ def is_encrypted(self) -> bool: def __init__( self, - fileobj: StrByteType = "", + fileobj: Union[None, PdfReader, StrByteType, Path] = "", clone_from: Union[None, PdfReader, StrByteType, Path] = None, ) -> None: self._header = b"%PDF-1.3" @@ -213,12 +213,41 @@ def __init__( ) self._root = self._add_object(self._root_object) + def _get_clone_from( + fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], + clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO], + ) -> Union[None, PdfReader, str, Path, IO[Any], BytesIO]: + if not isinstance(fileobj, (str, Path, IO, BytesIO)) or ( + fileobj != "" and clone_from is None + ): + cloning = True + if not ( + not isinstance(fileobj, (str, Path)) + or ( + Path(str(fileobj)).exists() + and Path(str(fileobj)).stat().st_size > 0 + ) + ): + cloning = False + if isinstance(fileobj, (IO, BytesIO)): + t = fileobj.tell() + fileobj.seek(-1, 2) + if fileobj.tell() == 0: + cloning = False + fileobj.seek(t, 0) + if cloning: + clone_from = fileobj + return clone_from + + clone_from = _get_clone_from(fileobj, clone_from) + # to prevent overwriting + self.temp_fileobj = fileobj + self.fileobj = "" + self.with_as_usage = False if clone_from is not None: if not isinstance(clone_from, PdfReader): clone_from = PdfReader(clone_from) self.clone_document_from_reader(clone_from) - self.fileobj = fileobj - self.with_as_usage = False self._encryption: Optional[Encryption] = None self._encrypt_entry: Optional[DictionaryObject] = None @@ -268,7 +297,10 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: def __enter__(self) -> "PdfWriter": """Store that writer is initialized by 'with'.""" + t = self.temp_fileobj + self.__init__() # type: ignore self.with_as_usage = True + self.fileobj = t # type: ignore return self def __exit__( diff --git a/tests/test_writer.py b/tests/test_writer.py index 6ff45c3eb..89b791211 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2196,3 +2196,24 @@ def test_mime_jupyter(): writer = PdfWriter(clone_from=reader) assert reader._repr_mimebundle_(("include",), ("exclude",)) == {} assert writer._repr_mimebundle_(("include",), ("exclude",)) == {} + + +def test_init_without_named_arg(): + """Test to use file_obj argument and not clone_from""" + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + writer = PdfWriter(clone_from=reader) + nb = len(writer._objects) + writer = PdfWriter(reader) + assert len(writer._objects) == nb + with open(pdf_path, "rb") as f: + writer = PdfWriter(f) + f.seek(0, 0) + by = BytesIO(f.read()) + assert len(writer._objects) == nb + writer = PdfWriter(pdf_path) + assert len(writer._objects) == nb + writer = PdfWriter(str(pdf_path)) + assert len(writer._objects) == nb + writer = PdfWriter(by) + assert len(writer._objects) == nb