From 790093a4f9a24edad590fc8873a4657a0407db15 Mon Sep 17 00:00:00 2001
From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com>
Date: Wed, 28 Feb 2024 19:13:36 +0100
Subject: [PATCH] Support for `start` & `type` attributes of <ol> tags when
 using FPDF.write_html()

---
 CHANGELOG.md                                  |   3 +-
 fpdf/fpdf.py                                  |   5 +-
 fpdf/html.py                                  |  43 +++++++++++----
 fpdf/util.py                                  |  27 ++++++++++
 ...let_color.pdf => html_li_prefix_color.pdf} | Bin
 test/html/html_ol_start_and_type.pdf          | Bin 0 -> 1019 bytes
 test/html/test_html.py                        |  51 +++++++++++-------
 7 files changed, 98 insertions(+), 31 deletions(-)
 rename test/html/{html_ul_bullet_color.pdf => html_li_prefix_color.pdf} (100%)
 create mode 100644 test/html/html_ol_start_and_type.pdf
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 429a8d425..5438ab6f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,7 +19,8 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 ## [2.7.9] - Not released yet
 ### Added
 * support for overriding paragraph direction on bidirectional text
-* new optional `ul_bullet_color` parameter for `FPDF.write_html()`
+* new optional `li_prefix_color` parameter for `FPDF.write_html()`
+* support for `start` & `type` attributes of `<ol>` tags when using `FPDF.write_html()`
 * [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html) now accepts a `tag_styles` parameter to control the font, color & size of HTML elements: `<a>`, `<blockquote>`, `<li>`...
 * [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html) now accepts a `tag_indents` parameter to control, for example, the indent of `<blockquote>` elements
 ### Changed
diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
index 98711934a..ec75d94d5 100644
--- a/fpdf/fpdf.py
+++ b/fpdf/fpdf.py
@@ -405,8 +405,9 @@ def write_html(self, text, *args, **kwargs):
             li_tag_indent (int): [**DEPRECATED since v2.7.8**] numeric indentation of <li> elements - Set tag_indents instead
             dd_tag_indent (int): [**DEPRECATED since v2.7.8**] numeric indentation of <dd> elements - Set tag_indents instead
             table_line_separators (bool): enable horizontal line separators in <table>
-            ul_bullet_char (str): bullet character for <ul> elements
-            ul_bullet_color (tuple | str | drawing.Device* instance): color of the <ul> bullets
+            ul_bullet_char (str): bullet character preceding <li> items in <ul> lists.
+            li_prefix_color (tuple | str | drawing.Device* instance): color for bullets or numbers preceding <li> tags.
+                This applies to both <ul> & <ol> lists.
             heading_sizes (dict): [**DEPRECATED since v2.7.8**] font size per heading level names ("h1", "h2"...) - Set tag_styles instead
             pre_code_font (str): [**DEPRECATED since v2.7.8**] font to use for <pre> & <code> blocks - Set tag_styles instead
             warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
diff --git a/fpdf/html.py b/fpdf/html.py
index be17c635d..082e7c2e1 100644
--- a/fpdf/html.py
+++ b/fpdf/html.py
@@ -7,6 +7,7 @@
 """
 
 from html.parser import HTMLParser
+from string import ascii_lowercase, ascii_uppercase
 import logging, re, warnings
 
 from .deprecation import get_stack_level
@@ -15,6 +16,7 @@
 from .errors import FPDFException
 from .fonts import FontFace
 from .table import Table
+from .util import int2roman
 
 LOGGER = logging.getLogger(__name__)
 BULLET_WIN1252 = "\x95"  # BULLET character in Windows-1252 encoding
@@ -249,7 +251,7 @@ def __init__(
         dd_tag_indent=10,
         table_line_separators=False,
         ul_bullet_char=BULLET_WIN1252,
-        ul_bullet_color=(190, 0, 0),
+        li_prefix_color=(190, 0, 0),
         heading_sizes=None,
         pre_code_font=DEFAULT_TAG_STYLES["pre"].family,
         warn_on_tags_not_matching=True,
@@ -265,8 +267,9 @@ def __init__(
             li_tag_indent (int): [**DEPRECATED since v2.7.9**] numeric indentation of <li> elements - Set tag_indents instead
             dd_tag_indent (int): [**DEPRECATED since v2.7.9**] numeric indentation of <dd> elements - Set tag_indents instead
             table_line_separators (bool): enable horizontal line separators in <table>
-            ul_bullet_char (str): bullet character for <ul> elements
-            ul_bullet_color (tuple | str | drawing.Device* instance): color of the <ul> bullets
+            ul_bullet_char (str): bullet character preceding <li> items in <ul> lists.
+            li_prefix_color (tuple | str | drawing.Device* instance): color for bullets or numbers preceding <li> tags.
+                This applies to both <ul> & <ol> lists.
             heading_sizes (dict): [**DEPRECATED since v2.7.9**] font size per heading level names ("h1", "h2"...) - Set tag_styles instead
             pre_code_font (str): [**DEPRECATED since v2.7.9**] font to use for <pre> & <code> blocks - Set tag_styles instead
             warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
@@ -277,10 +280,10 @@ def __init__(
         self.pdf = pdf
         self.image_map = image_map or (lambda src: src)
         self.ul_bullet_char = ul_bullet_char
-        self.ul_bullet_color = (
-            color_as_decimal(ul_bullet_color)
-            if isinstance(ul_bullet_color, str)
-            else convert_to_device_color(ul_bullet_color).colors255
+        self.li_prefix_color = (
+            color_as_decimal(li_prefix_color)
+            if isinstance(li_prefix_color, str)
+            else convert_to_device_color(li_prefix_color).colors255
         )
         self.warn_on_tags_not_matching = warn_on_tags_not_matching
 
@@ -308,6 +311,7 @@ def __init__(
         self.align = ""
         self.style_stack = []  # list of FontFace
         self.indent = 0
+        self.ol_type = []  # when inside a <ol> tag, can be "a", "A", "i", "I" or "1"
         self.bullet = []
         self.font_color = pdf.text_color.colors255
         self.heading_level = None
@@ -640,11 +644,13 @@ def handle_starttag(self, tag, attrs):
             self._new_paragraph()
         if tag == "ol":
             self.indent += 1
-            self.bullet.append(0)
+            start = int(attrs["start"]) if "start" in attrs else 1
+            self.bullet.append(start - 1)
+            self.ol_type.append(attrs.get("type", "1"))
             self._new_paragraph()
         if tag == "li":
             self._ln(2)
-            self.set_text_color(*self.ul_bullet_color)
+            self.set_text_color(*self.li_prefix_color)
             if self.bullet:
                 bullet = self.bullet[self.indent - 1]
             else:
@@ -653,7 +659,8 @@ def handle_starttag(self, tag, attrs):
             if not isinstance(bullet, str):
                 bullet += 1
                 self.bullet[self.indent - 1] = bullet
-                bullet = f"{bullet}. "
+                ol_type = self.ol_type[self.indent - 1]
+                bullet = f"{ol_prefix(ol_type, bullet)}. "
             indent = "\u00a0" * self.tag_indents["li"] * self.indent
             self._write_paragraph(f"{indent}{bullet} ")
             self.set_text_color(*self.font_color)
@@ -852,6 +859,8 @@ def handle_endtag(self, tag):
         if tag in ("ul", "ol"):
             self._end_paragraph()
             self.indent -= 1
+            if tag == "ol":
+                self.ol_type.pop()
             self.bullet.pop()
         if tag == "table":
             self.table.render()
@@ -964,6 +973,20 @@ def error(self, message):
         raise RuntimeError(message)
 
 
+def ol_prefix(ol_type, index):
+    if ol_type == "1":
+        return index
+    if ol_type == "a":
+        return ascii_lowercase[index - 1]
+    if ol_type == "A":
+        return ascii_uppercase[index - 1]
+    if ol_type == "I":
+        return int2roman(index)
+    if ol_type == "i":
+        return int2roman(index).lower()
+    raise NotImplementedError(f"Unsupported type: {ol_type}")
+
+
 class HTMLMixin:
     """
     [**DEPRECATED since v2.6.0**]
diff --git a/fpdf/util.py b/fpdf/util.py
index 9734cbd4c..5bfc109d7 100644
--- a/fpdf/util.py
+++ b/fpdf/util.py
@@ -111,6 +111,33 @@ def convert_unit(
     return to_convert / unit_conversion_factor
 
 
+ROMAN_NUMERAL_MAP = (
+    ("M", 1000),
+    ("CM", 900),
+    ("D", 500),
+    ("CD", 400),
+    ("C", 100),
+    ("XC", 90),
+    ("L", 50),
+    ("XL", 40),
+    ("X", 10),
+    ("IX", 9),
+    ("V", 5),
+    ("IV", 4),
+    ("I", 1),
+)
+
+
+def int2roman(n):
+    "Convert an integer to Roman numeral"
+    result = ""
+    for numeral, integer in ROMAN_NUMERAL_MAP:
+        while n >= integer:
+            result += numeral
+            n -= integer
+    return result
+
+
 ################################################################################
 ################### Utility functions to track memory usage ####################
 ################################################################################
diff --git a/test/html/html_ul_bullet_color.pdf b/test/html/html_li_prefix_color.pdf
similarity index 100%
rename from test/html/html_ul_bullet_color.pdf
rename to test/html/html_li_prefix_color.pdf
diff --git a/test/html/html_ol_start_and_type.pdf b/test/html/html_ol_start_and_type.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..bf956f4405fbc198f5d3d0c0cf8b623455fd804a
GIT binary patch
literal 1019
zcmah|&ubGw6t03&1_Y0>dUztCX{DW=?2lw61e$CXgO#S6K(V2RZ8Aw$H#22-f~hy9
zhzQ<1wCGU;FP`+E_($~YtsYeB#iK`O^203c!P&#^esA9QzWLspbj@*>$g++IXu!Ac
zqe21UGVe111mQbDJBFq%L*5p`cWFEDOMC!LO(LdkE@$(QH^_3{meI!EE(KikyEI0{
zBBHFV=&}jgwOz_qS|Z?V%n8^H#BE^xFq8x;b=BYFeF1ol(J~MFJvM@4Q1ZM<k+LuR
zkarVhl*FleCMXjz7^9q;AIaYdgGXcDk6JVaD@mz}C-_wAgwG8^K_jU(^aXWji?=Dl
ztCV%c4iG|ARxBdw_t4<A?*0CjJ<`pM%#R4IA3f9?nsuaWwb^QY_tNoVV+PG#J-M=(
zIcTPS-8;Cs|0a|AdHCvJZZS3EuYZ2>^!#kQbFz@xS=?`*yYP7XF#Yb+<@XPsmEbvf
z`{%{=uh(vT`Tp`)zc^F;Xns&0k1LPeHWRW-eoS314~`o_kH$+L@A=Zs3X=y2SQqd{
zz*blsOaO%I{cSO9qH3k={-~phBjJp4?VmG>zEHp}0ae+PPT*R^TXia+iREO;+H@cl
zRD06i()e8-zpGmb+FXLREX`C?;z++C<J=;)Ws|H<h^@)BN$hz|n_oyI4<gz@8sw3R
zj}};_shiM&DQ3ek1Z;}g@-%8CFk+wGm(88U3`>sosa(^NV|og+jQkW6kspLK8eXz`
z@Q6yu*yCJ)x+ZeGtE|I8S)sliXclZIm$ORQ5^?kugXp$t8Ahe-$R}qwB<~jgzhmff
SEc{3eZOUmjN~bGpF8T|eB^pQo

literal 0
HcmV?d00001

diff --git a/test/html/test_html.py b/test/html/test_html.py
index efda78936..c2fa3e8e3 100644
--- a/test/html/test_html.py
+++ b/test/html/test_html.py
@@ -191,9 +191,9 @@ def test_html_bold_italic_underline(tmp_path):
 
 def test_html_customize_ul(tmp_path):
     html = """<ul>
-            <li><b>term1</b>: definition1</li>
-            <li><b>term2</b>: definition2</li>
-        </ul>"""
+        <li><b>term1</b>: definition1</li>
+        <li><b>term2</b>: definition2</li>
+    </ul>"""
 
     # 1. Customizing through class attributes:
     class CustomPDF(FPDF):
@@ -203,21 +203,36 @@ class CustomPDF(FPDF):
     pdf = CustomPDF()
     pdf.set_font_size(30)
     pdf.add_page()
-    pdf.write_html(html)
-    pdf.ln()
-    # 2. Customizing through instance attributes:
-    pdf.li_tag_indent = 10
-    pdf.ul_bullet_char = "\x9b"
-    pdf.write_html(html)
-    pdf.ln()
-    # 3. Customizing through optional method arguments:
-    for indent, bullet in ((15, "\xac"), (20, "\xb7")):
-        pdf.write_html(html, li_tag_indent=indent, ul_bullet_char=bullet)
+    with pytest.warns(DeprecationWarning):  # li_tag_indent
+        pdf.write_html(html)
         pdf.ln()
+        # 2. Customizing through instance attributes:
+        pdf.li_tag_indent = 10
+        pdf.ul_bullet_char = "\x9b"
+        pdf.write_html(html)
+        pdf.ln()
+        # 3. Customizing through optional method arguments:
+        for indent, bullet in ((15, "\xac"), (20, "\xb7")):
+            pdf.write_html(html, li_tag_indent=indent, ul_bullet_char=bullet)
+            pdf.ln()
     assert_pdf_equal(pdf, HERE / "html_customize_ul.pdf", tmp_path)
 
 
-def test_html_ul_bullet_color(tmp_path):
+def test_html_ol_start_and_type(tmp_path):
+    pdf = FPDF()
+    pdf.set_font_size(30)
+    pdf.add_page()
+    pdf.write_html(
+        """<ol start="2" type="i">
+            <li>item</li>
+            <li>item</li>
+            <li>item</li>
+        </ol>"""
+    )
+    assert_pdf_equal(pdf, HERE / "html_ol_start_and_type.pdf", tmp_path)
+
+
+def test_html_li_prefix_color(tmp_path):
     html = """<ul>
         <li>item1</li>
         <li>item2</li>
@@ -227,13 +242,13 @@ def test_html_ul_bullet_color(tmp_path):
     pdf = FPDF()
     pdf.set_font_size(30)
     pdf.add_page()
-    pdf.write_html(html, ul_bullet_color=0)  # black
+    pdf.write_html(html, li_prefix_color=0)  # black
     pdf.ln()
-    pdf.write_html(html, ul_bullet_color="green")
+    pdf.write_html(html, li_prefix_color="green")
     pdf.ln()
-    pdf.write_html(html, ul_bullet_color=DeviceRGB(r=0.5, g=1, b=0))
+    pdf.write_html(html, li_prefix_color=DeviceRGB(r=0.5, g=1, b=0))
     pdf.ln()
-    assert_pdf_equal(pdf, HERE / "html_ul_bullet_color.pdf", tmp_path)
+    assert_pdf_equal(pdf, HERE / "html_li_prefix_color.pdf", tmp_path)
 
 
 def test_html_align_paragraph(tmp_path):