py-pdf · stefan6419846 · Sep 14, 2024 · Aug 23, 2024 · Aug 24, 2024 · Aug 25, 2024
diff --git a/docs/user/metadata.md b/docs/user/metadata.md
@@ -76,6 +76,30 @@ writer.add_metadata(
     }
 )
 
+# Clear all data but keep the entry in PDF
+writer.metadata = {}
+
+# Replace all entries with new set of entries
+writer.metadata = {
+    "/Author": "Martin",
+    "/Producer": "Libre Writer",
+}
+
+# Save the new PDF to a file
+with open("meta-pdf.pdf", "wb") as f:
+    writer.write(f)
+```
+
+## Removing metadata entry
+
+```python
+from pypdf import PdfWriter
+
+writer = PdfWriter("example.pdf")
+
+# Remove Metadata (/Info entry)
+writer.metadata = None
+
 # Save the new PDF to a file
 with open("meta-pdf.pdf", "wb") as f:
     writer.write(f)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -53,7 +53,7 @@
 )
 
 from ._cmap import _default_fonts_space_width, build_char_map_from_dict
-from ._doc_common import PdfDocCommon
+from ._doc_common import DocumentInformation, PdfDocCommon
 from ._encryption import EncryptAlgorithm, Encryption
 from ._page import PageObject
 from ._page_labels import nums_clear_range, nums_insert, nums_next
@@ -194,7 +194,7 @@ def __init__(
         """
 
         self._ID: Union[ArrayObject, None] = None
-        self._info_obj: PdfObject
+        self._info_obj: Optional[PdfObject]
 
         if self.incremental:
             if isinstance(fileobj, (str, Path)):
@@ -309,13 +309,26 @@ def _info(self) -> Optional[DictionaryObject]:
         Returns:
             /Info Dictionary; None if the entry does not exist
         """
-        return cast(DictionaryObject, self._info_obj.get_object())
+        return (
+            None
+            if self._info_obj is None
+            else cast(DictionaryObject, self._info_obj.get_object())
+        )
 
     @_info.setter
-    def _info(self, value: Union[IndirectObject, DictionaryObject]) -> None:
-        obj = cast(DictionaryObject, self._info_obj.get_object())
-        obj.clear()
-        obj.update(cast(DictionaryObject, value.get_object()))
+    def _info(self, value: Optional[Union[IndirectObject, DictionaryObject]]) -> None:
+        if value is None:
+            try:
+                self._objects[self._info_obj.indirect_reference.idnum - 1] = None  # type: ignore
+            except (KeyError, AttributeError):
+                pass
+            self._info_obj = None
+        else:
+            if self._info_obj is None:
+                self._info_obj = self._add_object(DictionaryObject())
+            obj = cast(DictionaryObject, self._info_obj.get_object())
+            obj.clear()
+            obj.update(cast(DictionaryObject, value.get_object()))
 
     @property
     def xmp_metadata(self) -> Optional[XmpInformation]:
@@ -1186,6 +1199,7 @@ def clone_reader_document_root(self, reader: PdfReader) -> None:
             self._objects = [None] * cast(int, reader.trailer["/Size"])
         else:
             self._objects.clear()
+        self._info_obj = None
         self._root_object = reader.root_object.clone(self)
         self._pages = self._root_object.raw_get("/Pages")
 
@@ -1226,22 +1240,21 @@ def clone_document_from_reader(
                 document.
         """
         self.clone_reader_document_root(reader)
-        if TK.INFO in reader.trailer:
-            inf = reader._info
-            if self.incremental:
-                if inf is not None:
-                    self._info_obj = cast(
-                        IndirectObject, inf.clone(self).indirect_reference
-                    )
-                self._original_hash[
-                    cast(IndirectObject, self._info_obj.indirect_reference).idnum - 1
-                ] = cast(DictionaryObject, self._info_obj.get_object()).hash_bin()
-            elif inf is not None:
-                self._info_obj = self._add_object(
-                    DictionaryObject(cast(DictionaryObject, inf.get_object()))
+        inf = reader._info
+        if self.incremental:
+            if inf is not None:
+                self._info_obj = cast(
+                    IndirectObject, inf.clone(self).indirect_reference
                 )
-        else:
-            self._info_obj = self._add_object(DictionaryObject())
+                assert isinstance(self._info, DictionaryObject), "for mypy"
+                self._original_hash[
+                    self._info_obj.indirect_reference.idnum - 1
+                ] = self._info.hash_bin()
+        elif inf is not None:
+            self._info_obj = self._add_object(
+                DictionaryObject(cast(DictionaryObject, inf.get_object()))
+            )
+        # else: _info_obj = None done in clone_reader_document_root()
 
         try:
             self._ID = cast(ArrayObject, reader._ID).clone(self)
@@ -1547,6 +1560,34 @@ def _write_trailer(self, stream: StreamType, xref_location: int) -> None:
         trailer.write_to_stream(stream)
         stream.write(f"\nstartxref\n{xref_location}\n%%EOF\n".encode())  # eof
 
+    @property
+    def metadata(self) -> Optional[DocumentInformation]:
+        """
+        Retrieve/set the PDF file's document information dictionary, if it exists.
+
+        Args:
+            value: Dictionary with the entries to set. If None, remove the /Info entry from the PDF.
+
+        Note that some PDF files use (XMP) metadata streams instead of document
+        information dictionaries, and these metadata streams will not be
+        accessed by this function.
+        """
+        return super().metadata
+
+    @metadata.setter
+    def metadata(
+        self,
+        value: Optional[Union[DocumentInformation, DictionaryObject, Dict[Any, Any]]],
+    ) -> None:
+        if value is None:
+            self._info = None
+        else:
+            if self._info is not None:
+                self._info.clear()
+            else:
+                self._info = DictionaryObject()
+            self.add_metadata(value)
+
     def add_metadata(self, infos: Dict[str, Any]) -> None:
         """
         Add custom metadata to the output.

diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -1795,9 +1795,33 @@ def test_missing_info():
 
     writer = PdfWriter(clone_from=reader)
     assert len(writer.pages) == len(reader.pages)
+    assert writer.metadata is None
+    b = BytesIO()
+    writer.write(b)
+    assert b"/Info" not in b.getvalue()
+
     reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
-    writer._info = reader._info
+    writer.metadata = reader.metadata
     assert dict(writer._info) == dict(reader._info)
+    assert writer.metadata == reader.metadata
+    b = BytesIO()
+    writer.write(b)
+    assert b"/Info" in b.getvalue()
+
+    writer.metadata = {}
+    writer._info = DictionaryObject()  # for code coverage
+    b = BytesIO()
+    writer.write(b)
+    assert b"/Info" in b.getvalue()
+    assert writer.metadata == {}
+
+    writer.metadata = None
+    writer.metadata = None  # for code coverage
+    assert writer.metadata is None
+    assert PdfWriter().metadata == {"/Producer": "pypdf"}
+    b = BytesIO()
+    writer.write(b)
+    assert b"/Info" not in b.getvalue()
 
 
 @pytest.mark.enable_socket()
@@ -2417,6 +2441,8 @@ def test_increment_writer(caplog):
     writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True)
     # 1 object is modified: page 0  inherits MediaBox so is changed
     assert len(writer.list_objects_in_increment()) == 1
+    b = BytesIO()
+    writer.write(b)
 
     writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=False)
     # 1 object is modified: page 0  inherits MediaBox so is changed
@@ -2438,7 +2464,13 @@ def test_increment_writer(caplog):
 
     # clone without info
     writer = PdfWriter(RESOURCE_ROOT / "missing_info.pdf", incremental=True)
+    assert len(writer.list_objects_in_increment()) == 0
+    assert writer.metadata is None
+    writer.metadata = {}
+    assert writer.metadata == {}
     assert len(writer.list_objects_in_increment()) == 1
-    assert writer._info == {}
+    writer.metadata = None
+    assert len(writer.list_objects_in_increment()) == 0
+    assert writer.metadata is None
     b = BytesIO()
     writer.write(b)