Skip to content

Commit

Permalink
Unicode characters in headings are now properly displayed in the tabl…
Browse files Browse the repository at this point in the history
…e of content - fix #320
  • Loading branch information
Lucas-C committed Jan 20, 2022
1 parent b4b3e51 commit 74cd2ec
Show file tree
Hide file tree
Showing 21 changed files with 30 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and [PEP 440](https://www.python.org/dev/peps/pep-0440/).

### Fixed
- `will_page_break()` & `accept_page_break` are not invoked anymore during a call to `multi_cell(split_only=True)`
- Unicode characters in headings are now properly displayed in the table of content, _cf._ [#320](https://github.com/PyFPDF/fpdf2/issues/320)

## [2.4.6] - 2021-11-16
### Added
Expand Down
8 changes: 1 addition & 7 deletions docs/qpdf-logo.svg
100644 → 100755
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1585,7 +1585,7 @@ def set_font(self, family=None, style="", size=0):
Standard fonts use `Latin-1` encoding by default, but Windows
encoding `cp1252` (Western Europe) can be used with
[set_doc_option](set_doc_option.md) ("core_fonts_encoding", encoding).
`self.core_fonts_encoding = encoding`.
The font specified is retained from page to page.
The method can be called before the first page is created.
Expand Down
6 changes: 2 additions & 4 deletions fpdf/syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def create_dictionary_string(
return "".join(
[
open_dict,
field_join.join(key_value_join.join(map(str, f)) for f in dict_.items()),
field_join.join(key_value_join.join((k, str(v))) for k, v in dict_.items()),
close_dict,
]
)
Expand Down Expand Up @@ -198,9 +198,7 @@ def camel_case(property_name):

class PDFString(str):
def serialize(self):
# Filtering out characters that are not encodable as Latin1 for now,
# as an outline /Title seemingly cannot "just" be encoded as UTF-16BE:
return f'({self.encode("latin-1", "ignore").decode("latin-1")})'
return f'({self.encode("UTF-16").decode("latin-1")})'


class PDFArray(list):
Expand Down
Binary file modified test/html/html_custom_heading_sizes.pdf
Binary file not shown.
Binary file modified test/html/html_features.pdf
Binary file not shown.
Binary file modified test/html/html_heading_hebrew.pdf
Binary file not shown.
Binary file modified test/html/html_headings_line_height.pdf
Binary file not shown.
Binary file modified test/image/alt_text/alt_text_and_title.pdf
Binary file not shown.
Binary file modified test/image/alt_text/test_alt_text_on_two_pages.pdf
Binary file not shown.
Binary file modified test/link_alt_text.pdf
Binary file not shown.
Binary file modified test/outline/2_pages_outline.pdf
Binary file not shown.
Binary file modified test/outline/custom_HTML2FPDF.pdf
Binary file not shown.
Binary file modified test/outline/html_toc.pdf
Binary file not shown.
Binary file modified test/outline/html_toc_2_pages.pdf
Binary file not shown.
Binary file modified test/outline/html_toc_with_h1_as_2nd_heading.pdf
Binary file not shown.
Binary file added test/outline/russian_heading.pdf
Binary file not shown.
Binary file modified test/outline/simple_outline.pdf
Binary file not shown.
10 changes: 10 additions & 0 deletions test/outline/test_outline.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,13 @@ def test_2_pages_outline(tmp_path):
" sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
)
assert_pdf_equal(pdf, HERE / "2_pages_outline.pdf", tmp_path)


def test_russian_heading(tmp_path): # issue-320
pdf = FPDF()
pdf.add_font("Roboto", style="B", fname="test/fonts/Roboto-Regular.ttf", uni=True)
pdf.set_font("Roboto", style="B")
pdf.add_page()
pdf.start_section("Русский, English, 1 2 3...")
pdf.write(8, "Русский текст в параграфе.")
assert_pdf_equal(pdf, HERE / "russian_heading.pdf", tmp_path)
24 changes: 12 additions & 12 deletions test/outline/test_outline_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_serialize_outline():
)
assert (
serialize_outline(sections, first_object_id=6)
== """\
== f"""\
6 0 obj
<<
/Count 2
Expand All @@ -35,15 +35,15 @@ def test_serialize_outline():
/Last 8 0 R
/Next 9 0 R
/Parent 6 0 R
/Title (Title 1)
/Title ({'Title 1'.encode('UTF-16').decode('latin-1')})
>>
endobj
8 0 obj
<<
/Count 0
/Dest [5 0 R /XYZ 0 0 null]
/Parent 7 0 R
/Title (Subtitle 1.1)
/Title ({'Subtitle 1.1'.encode('UTF-16').decode('latin-1')})
>>
endobj
9 0 obj
Expand All @@ -54,7 +54,7 @@ def test_serialize_outline():
/Last 11 0 R
/Parent 6 0 R
/Prev 7 0 R
/Title (Title 2)
/Title ({'Title 2'.encode('UTF-16').decode('latin-1')})
>>
endobj
10 0 obj
Expand All @@ -63,7 +63,7 @@ def test_serialize_outline():
/Dest [9 0 R /XYZ 0 0 null]
/Next 11 0 R
/Parent 9 0 R
/Title (Subtitle 2.1)
/Title ({'Subtitle 2.1'.encode('UTF-16').decode('latin-1')})
>>
endobj
11 0 obj
Expand All @@ -72,7 +72,7 @@ def test_serialize_outline():
/Dest [11 0 R /XYZ 0 0 null]
/Parent 9 0 R
/Prev 10 0 R
/Title (Subtitle 2.2)
/Title ({'Subtitle 2.2'.encode('UTF-16').decode('latin-1')})
>>
endobj"""
)
Expand All @@ -88,7 +88,7 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
)
assert (
serialize_outline(sections, first_object_id=6)
== """\
== f"""\
6 0 obj
<<
/Count 2
Expand All @@ -104,15 +104,15 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
/First 8 0 R
/Last 8 0 R
/Parent 6 0 R
/Title (?-1)
/Title ({'?-1'.encode('UTF-16').decode('latin-1')})
>>
endobj
8 0 obj
<<
/Count 0
/Dest [5 0 R /XYZ 0 0 null]
/Parent 7 0 R
/Title (?-1-1)
/Title ({'?-1-1'.encode('UTF-16').decode('latin-1')})
>>
endobj
9 0 obj
Expand All @@ -122,7 +122,7 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
/First 10 0 R
/Last 10 0 R
/Parent 6 0 R
/Title (1)
/Title ({'1'.encode('UTF-16').decode('latin-1')})
>>
endobj
10 0 obj
Expand All @@ -132,15 +132,15 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
/First 11 0 R
/Last 11 0 R
/Parent 9 0 R
/Title (1-1)
/Title ({'1-1'.encode('UTF-16').decode('latin-1')})
>>
endobj
11 0 obj
<<
/Count 0
/Dest [5 0 R /XYZ 0 0 null]
/Parent 10 0 R
/Title (1-1-1)
/Title ({'1-1-1'.encode('UTF-16').decode('latin-1')})
>>
endobj"""
)
6 changes: 3 additions & 3 deletions test/test_structure_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_single_image_structure_tree():
)
assert (
struct_builder.serialize(first_object_id=3)
== """\
== f"""\
3 0 obj
<<
/K [4 0 R]
Expand All @@ -106,12 +106,12 @@ def test_single_image_structure_tree():
endobj
6 0 obj
<<
/Alt (Image description)
/Alt ({'Image description'.encode('UTF-16').decode('latin-1')})
/K [0]
/P 4 0 R
/Pg 1 0 R
/S /Figure
/T (Image title)
/T ({'Image title'.encode('UTF-16').decode('latin-1')})
/Type /StructElem
>>
endobj"""
Expand Down

0 comments on commit 74cd2ec

Please sign in to comment.