From 790093a4f9a24edad590fc8873a4657a0407db15 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Wed, 28 Feb 2024 19:13:36 +0100 Subject: [PATCH] Support for `start` & `type` attributes of
    tags when using FPDF.write_html() --- CHANGELOG.md | 3 +- fpdf/fpdf.py | 5 +- fpdf/html.py | 43 +++++++++++---- fpdf/util.py | 27 ++++++++++ ...let_color.pdf => html_li_prefix_color.pdf} | Bin test/html/html_ol_start_and_type.pdf | Bin 0 -> 1019 bytes test/html/test_html.py | 51 +++++++++++------- 7 files changed, 98 insertions(+), 31 deletions(-) rename test/html/{html_ul_bullet_color.pdf => html_li_prefix_color.pdf} (100%) create mode 100644 test/html/html_ol_start_and_type.pdf diff --git a/CHANGELOG.md b/CHANGELOG.md index 429a8d425..5438ab6f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,8 @@ This can also be enabled programmatically with `warnings.simplefilter('default', ## [2.7.9] - Not released yet ### Added * support for overriding paragraph direction on bidirectional text -* new optional `ul_bullet_color` parameter for `FPDF.write_html()` +* new optional `li_prefix_color` parameter for `FPDF.write_html()` +* support for `start` & `type` attributes of `
      ` tags when using `FPDF.write_html()` * [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html) now accepts a `tag_styles` parameter to control the font, color & size of HTML elements: ``, `
      `, `
    1. `... * [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html) now accepts a `tag_indents` parameter to control, for example, the indent of `
      ` elements ### Changed diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index 98711934a..ec75d94d5 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -405,8 +405,9 @@ def write_html(self, text, *args, **kwargs): li_tag_indent (int): [**DEPRECATED since v2.7.8**] numeric indentation of
    2. elements - Set tag_indents instead dd_tag_indent (int): [**DEPRECATED since v2.7.8**] numeric indentation of
      elements - Set tag_indents instead table_line_separators (bool): enable horizontal line separators in - ul_bullet_char (str): bullet character for
        elements - ul_bullet_color (tuple | str | drawing.Device* instance): color of the
          bullets + ul_bullet_char (str): bullet character preceding
        • items in
            lists. + li_prefix_color (tuple | str | drawing.Device* instance): color for bullets or numbers preceding
          • tags. + This applies to both
              &
                lists. heading_sizes (dict): [**DEPRECATED since v2.7.8**] font size per heading level names ("h1", "h2"...) - Set tag_styles instead pre_code_font (str): [**DEPRECATED since v2.7.8**] font to use for
                 &  blocks - Set tag_styles instead
                             warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
                diff --git a/fpdf/html.py b/fpdf/html.py
                index be17c635d..082e7c2e1 100644
                --- a/fpdf/html.py
                +++ b/fpdf/html.py
                @@ -7,6 +7,7 @@
                 """
                 
                 from html.parser import HTMLParser
                +from string import ascii_lowercase, ascii_uppercase
                 import logging, re, warnings
                 
                 from .deprecation import get_stack_level
                @@ -15,6 +16,7 @@
                 from .errors import FPDFException
                 from .fonts import FontFace
                 from .table import Table
                +from .util import int2roman
                 
                 LOGGER = logging.getLogger(__name__)
                 BULLET_WIN1252 = "\x95"  # BULLET character in Windows-1252 encoding
                @@ -249,7 +251,7 @@ def __init__(
                         dd_tag_indent=10,
                         table_line_separators=False,
                         ul_bullet_char=BULLET_WIN1252,
                -        ul_bullet_color=(190, 0, 0),
                +        li_prefix_color=(190, 0, 0),
                         heading_sizes=None,
                         pre_code_font=DEFAULT_TAG_STYLES["pre"].family,
                         warn_on_tags_not_matching=True,
                @@ -265,8 +267,9 @@ def __init__(
                             li_tag_indent (int): [**DEPRECATED since v2.7.9**] numeric indentation of 
              1. elements - Set tag_indents instead dd_tag_indent (int): [**DEPRECATED since v2.7.9**] numeric indentation of
                elements - Set tag_indents instead table_line_separators (bool): enable horizontal line separators in
      - ul_bullet_char (str): bullet character for
        elements - ul_bullet_color (tuple | str | drawing.Device* instance): color of the
          bullets + ul_bullet_char (str): bullet character preceding
        • items in
            lists. + li_prefix_color (tuple | str | drawing.Device* instance): color for bullets or numbers preceding
          • tags. + This applies to both
              &
                lists. heading_sizes (dict): [**DEPRECATED since v2.7.9**] font size per heading level names ("h1", "h2"...) - Set tag_styles instead pre_code_font (str): [**DEPRECATED since v2.7.9**] font to use for
                 &  blocks - Set tag_styles instead
                             warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
                @@ -277,10 +280,10 @@ def __init__(
                         self.pdf = pdf
                         self.image_map = image_map or (lambda src: src)
                         self.ul_bullet_char = ul_bullet_char
                -        self.ul_bullet_color = (
                -            color_as_decimal(ul_bullet_color)
                -            if isinstance(ul_bullet_color, str)
                -            else convert_to_device_color(ul_bullet_color).colors255
                +        self.li_prefix_color = (
                +            color_as_decimal(li_prefix_color)
                +            if isinstance(li_prefix_color, str)
                +            else convert_to_device_color(li_prefix_color).colors255
                         )
                         self.warn_on_tags_not_matching = warn_on_tags_not_matching
                 
                @@ -308,6 +311,7 @@ def __init__(
                         self.align = ""
                         self.style_stack = []  # list of FontFace
                         self.indent = 0
                +        self.ol_type = []  # when inside a 
                  tag, can be "a", "A", "i", "I" or "1" self.bullet = [] self.font_color = pdf.text_color.colors255 self.heading_level = None @@ -640,11 +644,13 @@ def handle_starttag(self, tag, attrs): self._new_paragraph() if tag == "ol": self.indent += 1 - self.bullet.append(0) + start = int(attrs["start"]) if "start" in attrs else 1 + self.bullet.append(start - 1) + self.ol_type.append(attrs.get("type", "1")) self._new_paragraph() if tag == "li": self._ln(2) - self.set_text_color(*self.ul_bullet_color) + self.set_text_color(*self.li_prefix_color) if self.bullet: bullet = self.bullet[self.indent - 1] else: @@ -653,7 +659,8 @@ def handle_starttag(self, tag, attrs): if not isinstance(bullet, str): bullet += 1 self.bullet[self.indent - 1] = bullet - bullet = f"{bullet}. " + ol_type = self.ol_type[self.indent - 1] + bullet = f"{ol_prefix(ol_type, bullet)}. " indent = "\u00a0" * self.tag_indents["li"] * self.indent self._write_paragraph(f"{indent}{bullet} ") self.set_text_color(*self.font_color) @@ -852,6 +859,8 @@ def handle_endtag(self, tag): if tag in ("ul", "ol"): self._end_paragraph() self.indent -= 1 + if tag == "ol": + self.ol_type.pop() self.bullet.pop() if tag == "table": self.table.render() @@ -964,6 +973,20 @@ def error(self, message): raise RuntimeError(message) +def ol_prefix(ol_type, index): + if ol_type == "1": + return index + if ol_type == "a": + return ascii_lowercase[index - 1] + if ol_type == "A": + return ascii_uppercase[index - 1] + if ol_type == "I": + return int2roman(index) + if ol_type == "i": + return int2roman(index).lower() + raise NotImplementedError(f"Unsupported type: {ol_type}") + + class HTMLMixin: """ [**DEPRECATED since v2.6.0**] diff --git a/fpdf/util.py b/fpdf/util.py index 9734cbd4c..5bfc109d7 100644 --- a/fpdf/util.py +++ b/fpdf/util.py @@ -111,6 +111,33 @@ def convert_unit( return to_convert / unit_conversion_factor +ROMAN_NUMERAL_MAP = ( + ("M", 1000), + ("CM", 900), + ("D", 500), + ("CD", 400), + ("C", 100), + ("XC", 90), + ("L", 50), + ("XL", 40), + ("X", 10), + ("IX", 9), + ("V", 5), + ("IV", 4), + ("I", 1), +) + + +def int2roman(n): + "Convert an integer to Roman numeral" + result = "" + for numeral, integer in ROMAN_NUMERAL_MAP: + while n >= integer: + result += numeral + n -= integer + return result + + ################################################################################ ################### Utility functions to track memory usage #################### ################################################################################ diff --git a/test/html/html_ul_bullet_color.pdf b/test/html/html_li_prefix_color.pdf similarity index 100% rename from test/html/html_ul_bullet_color.pdf rename to test/html/html_li_prefix_color.pdf diff --git a/test/html/html_ol_start_and_type.pdf b/test/html/html_ol_start_and_type.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bf956f4405fbc198f5d3d0c0cf8b623455fd804a GIT binary patch literal 1019 zcmah|&ubGw6t03&1_Y0>dUztCX{DW=?2lw61e$CXgO#S6K(V2RZ8Aw$H#22-f~hy9 zhzQ<1wCGU;FP`+E_($~YtsYeB#iK`O^203c!P&#^esA9QzWLspbj@*>$g++IXu!Ac zqe21UGVe111mQbDJBFq%L*5p`cWFEDOMC!LO(LdkE@$(QH^_3{meI!EE(KikyEI0{ zBBHFV=&}jgwOz_qS|Z?V%n8^H#BE^xFq8x;b=BYFeF1ol(J~MFJvM@4Q1ZM^!#kQbFz@xS=?`*yYP7XF#Yb+<@XPsmEbvf z`{%{=uh(vT`Tp`)zc^F;Xns&0k1LPeHWRW-eoS314~`o_kH$+L@A=Zs3X=y2SQqd{ zz*blsOaO%I{cSO9qH3k={-~phBjJp4?VmG>zEHp}0ae+PPT*R^TXia+iREO;+H@cl zRD06i()e8-zpGmb+FXLREX`C?;z++Csosa(^NV|og+jQkW6kspLK8eXz` z@Q6yu*yCJ)x+ZeGtE|I8S)sliXclZIm$ORQ5^?kugXp$t8Ahe-$R}qwB<~jgzhmff SEc{3eZOUmjN~bGpF8T|eB^pQo literal 0 HcmV?d00001 diff --git a/test/html/test_html.py b/test/html/test_html.py index efda78936..c2fa3e8e3 100644 --- a/test/html/test_html.py +++ b/test/html/test_html.py @@ -191,9 +191,9 @@ def test_html_bold_italic_underline(tmp_path): def test_html_customize_ul(tmp_path): html = """
                    -
                  • term1: definition1
                  • -
                  • term2: definition2
                  • -
                  """ +
                1. term1: definition1
                2. +
                3. term2: definition2
                4. +
            """ # 1. Customizing through class attributes: class CustomPDF(FPDF): @@ -203,21 +203,36 @@ class CustomPDF(FPDF): pdf = CustomPDF() pdf.set_font_size(30) pdf.add_page() - pdf.write_html(html) - pdf.ln() - # 2. Customizing through instance attributes: - pdf.li_tag_indent = 10 - pdf.ul_bullet_char = "\x9b" - pdf.write_html(html) - pdf.ln() - # 3. Customizing through optional method arguments: - for indent, bullet in ((15, "\xac"), (20, "\xb7")): - pdf.write_html(html, li_tag_indent=indent, ul_bullet_char=bullet) + with pytest.warns(DeprecationWarning): # li_tag_indent + pdf.write_html(html) pdf.ln() + # 2. Customizing through instance attributes: + pdf.li_tag_indent = 10 + pdf.ul_bullet_char = "\x9b" + pdf.write_html(html) + pdf.ln() + # 3. Customizing through optional method arguments: + for indent, bullet in ((15, "\xac"), (20, "\xb7")): + pdf.write_html(html, li_tag_indent=indent, ul_bullet_char=bullet) + pdf.ln() assert_pdf_equal(pdf, HERE / "html_customize_ul.pdf", tmp_path) -def test_html_ul_bullet_color(tmp_path): +def test_html_ol_start_and_type(tmp_path): + pdf = FPDF() + pdf.set_font_size(30) + pdf.add_page() + pdf.write_html( + """
              +
            1. item
            2. +
            3. item
            4. +
            5. item
            6. +
            """ + ) + assert_pdf_equal(pdf, HERE / "html_ol_start_and_type.pdf", tmp_path) + + +def test_html_li_prefix_color(tmp_path): html = """
            • item1
            • item2
            • @@ -227,13 +242,13 @@ def test_html_ul_bullet_color(tmp_path): pdf = FPDF() pdf.set_font_size(30) pdf.add_page() - pdf.write_html(html, ul_bullet_color=0) # black + pdf.write_html(html, li_prefix_color=0) # black pdf.ln() - pdf.write_html(html, ul_bullet_color="green") + pdf.write_html(html, li_prefix_color="green") pdf.ln() - pdf.write_html(html, ul_bullet_color=DeviceRGB(r=0.5, g=1, b=0)) + pdf.write_html(html, li_prefix_color=DeviceRGB(r=0.5, g=1, b=0)) pdf.ln() - assert_pdf_equal(pdf, HERE / "html_ul_bullet_color.pdf", tmp_path) + assert_pdf_equal(pdf, HERE / "html_li_prefix_color.pdf", tmp_path) def test_html_align_paragraph(tmp_path):