Unicode characters in headings are now properly displayed in the tabl…

…e of content - fix #320
py-pdf · Jan 20, 2022 · 74cd2ec · 74cd2ec
1 parent b4b3e51
commit 74cd2ec
Show file tree

Hide file tree

Showing 21 changed files with 30 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ and [PEP 440](https://www.python.org/dev/peps/pep-0440/).
 
 ### Fixed
 - `will_page_break()` & `accept_page_break` are not invoked anymore during a call to `multi_cell(split_only=True)`
+- Unicode characters in headings are now properly displayed in the table of content, _cf._ [#320](https://github.com/PyFPDF/fpdf2/issues/320)
 
 ## [2.4.6] - 2021-11-16
 ### Added

diff --git a/docs/qpdf-logo.svg b/docs/qpdf-logo.svg
diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
@@ -1585,7 +1585,7 @@ def set_font(self, family=None, style="", size=0):
 
         Standard fonts use `Latin-1` encoding by default, but Windows
         encoding `cp1252` (Western Europe) can be used with
-        [set_doc_option](set_doc_option.md) ("core_fonts_encoding", encoding).
+        `self.core_fonts_encoding = encoding`.
 
         The font specified is retained from page to page.
         The method can be called before the first page is created.

diff --git a/fpdf/syntax.py b/fpdf/syntax.py
@@ -96,7 +96,7 @@ def create_dictionary_string(
     return "".join(
         [
             open_dict,
-            field_join.join(key_value_join.join(map(str, f)) for f in dict_.items()),
+            field_join.join(key_value_join.join((k, str(v))) for k, v in dict_.items()),
             close_dict,
         ]
     )
@@ -198,9 +198,7 @@ def camel_case(property_name):
 
 class PDFString(str):
     def serialize(self):
-        # Filtering out characters that are not encodable as Latin1 for now,
-        # as an outline /Title seemingly cannot "just" be encoded as UTF-16BE:
-        return f'({self.encode("latin-1", "ignore").decode("latin-1")})'
+        return f'({self.encode("UTF-16").decode("latin-1")})'
 
 
 class PDFArray(list):

diff --git a/test/html/html_custom_heading_sizes.pdf b/test/html/html_custom_heading_sizes.pdf
diff --git a/test/html/html_features.pdf b/test/html/html_features.pdf
diff --git a/test/html/html_heading_hebrew.pdf b/test/html/html_heading_hebrew.pdf
diff --git a/test/html/html_headings_line_height.pdf b/test/html/html_headings_line_height.pdf
diff --git a/test/image/alt_text/alt_text_and_title.pdf b/test/image/alt_text/alt_text_and_title.pdf
diff --git a/test/image/alt_text/test_alt_text_on_two_pages.pdf b/test/image/alt_text/test_alt_text_on_two_pages.pdf
diff --git a/test/link_alt_text.pdf b/test/link_alt_text.pdf
diff --git a/test/outline/2_pages_outline.pdf b/test/outline/2_pages_outline.pdf
diff --git a/test/outline/custom_HTML2FPDF.pdf b/test/outline/custom_HTML2FPDF.pdf
diff --git a/test/outline/html_toc.pdf b/test/outline/html_toc.pdf
diff --git a/test/outline/html_toc_2_pages.pdf b/test/outline/html_toc_2_pages.pdf
diff --git a/test/outline/html_toc_with_h1_as_2nd_heading.pdf b/test/outline/html_toc_with_h1_as_2nd_heading.pdf
diff --git a/test/outline/russian_heading.pdf b/test/outline/russian_heading.pdf
diff --git a/test/outline/simple_outline.pdf b/test/outline/simple_outline.pdf
diff --git a/test/outline/test_outline.py b/test/outline/test_outline.py
@@ -157,3 +157,13 @@ def test_2_pages_outline(tmp_path):
             " sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
         )
     assert_pdf_equal(pdf, HERE / "2_pages_outline.pdf", tmp_path)
+
+
+def test_russian_heading(tmp_path):  # issue-320
+    pdf = FPDF()
+    pdf.add_font("Roboto", style="B", fname="test/fonts/Roboto-Regular.ttf", uni=True)
+    pdf.set_font("Roboto", style="B")
+    pdf.add_page()
+    pdf.start_section("Русский, English, 1 2 3...")
+    pdf.write(8, "Русский текст в параграфе.")
+    assert_pdf_equal(pdf, HERE / "russian_heading.pdf", tmp_path)
diff --git a/test/outline/test_outline_serializer.py b/test/outline/test_outline_serializer.py
@@ -18,7 +18,7 @@ def test_serialize_outline():
     )
     assert (
         serialize_outline(sections, first_object_id=6)
-        == """\
+        == f"""\
 6 0 obj
 <<
 /Count 2
@@ -35,15 +35,15 @@ def test_serialize_outline():
 /Last 8 0 R
 /Next 9 0 R
 /Parent 6 0 R
-/Title (Title 1)
+/Title ({'Title 1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 8 0 obj
 <<
 /Count 0
 /Dest [5 0 R /XYZ 0 0 null]
 /Parent 7 0 R
-/Title (Subtitle 1.1)
+/Title ({'Subtitle 1.1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 9 0 obj
@@ -54,7 +54,7 @@ def test_serialize_outline():
 /Last 11 0 R
 /Parent 6 0 R
 /Prev 7 0 R
-/Title (Title 2)
+/Title ({'Title 2'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 10 0 obj
@@ -63,7 +63,7 @@ def test_serialize_outline():
 /Dest [9 0 R /XYZ 0 0 null]
 /Next 11 0 R
 /Parent 9 0 R
-/Title (Subtitle 2.1)
+/Title ({'Subtitle 2.1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 11 0 obj
@@ -72,7 +72,7 @@ def test_serialize_outline():
 /Dest [11 0 R /XYZ 0 0 null]
 /Parent 9 0 R
 /Prev 10 0 R
-/Title (Subtitle 2.2)
+/Title ({'Subtitle 2.2'.encode('UTF-16').decode('latin-1')})
 >>
 endobj"""
     )
@@ -88,7 +88,7 @@ def test_serialize_outline_with_headless_hierarchy():  # issues 239
     )
     assert (
         serialize_outline(sections, first_object_id=6)
-        == """\
+        == f"""\
 6 0 obj
 <<
 /Count 2
@@ -104,15 +104,15 @@ def test_serialize_outline_with_headless_hierarchy():  # issues 239
 /First 8 0 R
 /Last 8 0 R
 /Parent 6 0 R
-/Title (?-1)
+/Title ({'?-1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 8 0 obj
 <<
 /Count 0
 /Dest [5 0 R /XYZ 0 0 null]
 /Parent 7 0 R
-/Title (?-1-1)
+/Title ({'?-1-1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 9 0 obj
@@ -122,7 +122,7 @@ def test_serialize_outline_with_headless_hierarchy():  # issues 239
 /First 10 0 R
 /Last 10 0 R
 /Parent 6 0 R
-/Title (1)
+/Title ({'1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 10 0 obj
@@ -132,15 +132,15 @@ def test_serialize_outline_with_headless_hierarchy():  # issues 239
 /First 11 0 R
 /Last 11 0 R
 /Parent 9 0 R
-/Title (1-1)
+/Title ({'1-1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj
 11 0 obj
 <<
 /Count 0
 /Dest [5 0 R /XYZ 0 0 null]
 /Parent 10 0 R
-/Title (1-1-1)
+/Title ({'1-1-1'.encode('UTF-16').decode('latin-1')})
 >>
 endobj"""
     )
diff --git a/test/test_structure_tree.py b/test/test_structure_tree.py
@@ -83,7 +83,7 @@ def test_single_image_structure_tree():
     )
     assert (
         struct_builder.serialize(first_object_id=3)
-        == """\
+        == f"""\
 3 0 obj
 <<
 /K [4 0 R]
@@ -106,12 +106,12 @@ def test_single_image_structure_tree():
 endobj
 6 0 obj
 <<
-/Alt (Image description)
+/Alt ({'Image description'.encode('UTF-16').decode('latin-1')})
 /K [0]
 /P 4 0 R
 /Pg 1 0 R
 /S /Figure
-/T (Image title)
+/T ({'Image title'.encode('UTF-16').decode('latin-1')})
 /Type /StructElem
 >>
 endobj"""