From 430ff28cb3d3e8f05709b87e209de87fd5445d09 Mon Sep 17 00:00:00 2001 From: oleksii-shyman Date: Fri, 4 Feb 2022 03:13:28 -0800 Subject: [PATCH] feat :: add soft-hyphen for multi-cell (#308) --- CHANGELOG.md | 3 + fpdf/fpdf.py | 389 ++++---- fpdf/line_break.py | 254 ++++++ test/cells/cell_markdown_right_aligned.pdf | Bin 10863 -> 10863 bytes ...ioning_and_page_breaking_for_multicell.pdf | Bin 2032 -> 2020 bytes test/cells/multi_cell_markdown.pdf | Bin 1329 -> 1327 bytes .../multi_cell_markdown_with_ttf_fonts.pdf | Bin 21217 -> 21211 bytes test/test_line_break.py | 830 ++++++++++++++++++ test/test_markdown_parse.py | 33 +- 9 files changed, 1301 insertions(+), 208 deletions(-) create mode 100644 fpdf/line_break.py create mode 100644 test/test_line_break.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 27911053a..c7b6d81ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,13 +11,16 @@ and [PEP 440](https://www.python.org/dev/peps/pep-0440/). ### Added - documentation on combining `borb` & `fpdf2`: [Creating a borb.pdf.document.Document from a FPDF instance](https://pyfpdf.github.io/fpdf2/ExistingPDFs.html) - new documentation page on [Emojis, Symbols & Dingbats](https://pyfpdf.github.io/fpdf2/EmojisSymbolsDingbats.html) +- support for soft-hyphen break in `multi_cell` calls ### Changed - log level of `_substitute_page_number()` has been lowered from `INFO` to `DEBUG` +- `multi_cell` logic of splitting text into multiple lines was reworked and moved into separate module ### Fixed - a bug in `get_string_width()` with unicode fonts and Markdown enabled, resulting in calls to `cell()` / `multi_cell()` with `align="R"` to display nothing - thanks @mcerveny for the fix! +- a bug with incorrect width calculation of markdown text ## [2.5.0] - 2022-01-22 ### Added diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index 8722dc3f1..073456c1f 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -40,6 +40,7 @@ from .errors import FPDFException, FPDFPageFormatException from .fonts import fpdf_charwidths from .image_parsing import get_img_info, load_image, SUPPORTED_IMAGE_FILTERS +from .line_break import Fragment, MultiLineBreak from .outline import serialize_outline, OutlineSection from . import drawing from .recorder import FPDFRecorder @@ -862,21 +863,33 @@ def get_string_width(self, s, normalized=False, markdown=False): # normalized is parameter for internal use s = s if normalized else self.normalize_text(s) w = 0 - for txt_frag, style, _ in ( + for frag in ( self._markdown_parse(s) if markdown - else ((s, self.font_style, bool(self.underline)),) + else (Fragment.from_string(s, self.font_style, bool(self.underline)),) ): - font = self.fonts[self.font_family + style] - if self.unifontsubset: - for char in txt_frag: - w += _char_width(font, ord(char)) - else: - w += sum(_char_width(font, char) for char in txt_frag) + w += self.get_normalized_string_width_with_style(frag.string, frag.style) if self.font_stretching != 100: w *= self.font_stretching / 100 return w * self.font_size / 1000 + def get_normalized_string_width_with_style(self, string, style): + """ + Returns the length of a string with given style + + Args: + string (str): the string whose length is to be computed. + style (str) : the string representing the style + """ + w = 0 + font = self.fonts[self.font_family + style] + if self.unifontsubset: + for char in string: + w += _char_width(font, ord(char)) + else: + w += sum(_char_width(font, char) for char in string) + return w + def set_line_width(self, width): """ Defines the line width of all stroking operations (lines, rectangles and cell borders). @@ -2018,12 +2031,86 @@ def cell( # Font styles preloading must be performed before any call to FPDF.get_string_width: txt = self.normalize_text(txt) styled_txt_frags = self._preload_font_styles(txt, markdown) + return self._render_styled_cell_text( + w, + h, + styled_txt_frags, + border, + ln, + align, + fill, + link, + center, + ) + + def _render_styled_cell_text( + self, + w=None, + h=None, + styled_txt_frags=(), + border=0, + ln=0, + align="", + fill=False, + link="", + center=False, + ): + """ + Prints a cell (rectangular area) with optional borders, background color and + character string. The upper-left corner of the cell corresponds to the current + position. The text can be aligned or centered. After the call, the current + position moves to the right or to the next line. It is possible to put a link + on the text. + + If automatic page breaking is enabled and the cell goes beyond the limit, a + page break is performed before outputting. + + Args: + w (int): Cell width. Default value: None, meaning to fit text width. + If 0, the cell extends up to the right margin. + h (int): Cell height. Default value: None, meaning an height equal + to the current font size. + styled_txt_frags (tuple): Tuple of fragments to render. + Default value: empty tuple. + border: Indicates if borders must be drawn around the cell. + The value can be either a number (`0`: no border ; `1`: frame) + or a string containing some or all of the following characters + (in any order): + `L`: left ; `T`: top ; `R`: right ; `B`: bottom. Default value: 0. + ln (int): Indicates where the current position should go after the call. + Possible values are: `0`: to the right ; `1`: to the beginning of the + next line ; `2`: below. Putting 1 is equivalent to putting 0 and calling + `ln` just after. Default value: 0. + align (str): Allows to center or align the text inside the cell. + Possible values are: `L` or empty string: left align (default value) ; + `C`: center ; `R`: right align + fill (bool): Indicates if the cell background must be painted (`True`) + or transparent (`False`). Default value: False. + link (str): optional link to add on the cell, internal + (identifier returned by `add_link`) or external URL. + center (bool): center the cell horizontally in the page + markdown (bool): enable minimal markdown-like markup to render part + of text as bold / italics / underlined. Default to False. + + Returns: a boolean indicating if page break was triggered + """ + if not self.font_family: + raise FPDFException("No font set, you need to call set_font() beforehand") + if isinstance(border, int) and border not in (0, 1): + warnings.warn( + 'Integer values for "border" parameter other than 1 are currently ' + "ignored" + ) + border = 1 + styled_txt_width = 0 + for styled_txt_frag in styled_txt_frags: + styled_txt_width += self.get_string_width(styled_txt_frag.string) if w == 0: w = self.w - self.r_margin - self.x elif w is None: - if not txt: + if not styled_txt_frags: raise ValueError("A 'txt' parameter must be provided if 'w' is None") - w = self.get_string_width(txt, True, markdown) + 2 + w = styled_txt_width + 2 if h is None: h = self.font_size # pylint: disable=invalid-unary-operand-type @@ -2068,11 +2155,11 @@ def cell( f"{(x + w) * k:.2f} {(self.h - (y + h)) * k:.2f} l S " ) - if txt: + if styled_txt_frags: if align == "R": - dx = w - self.c_margin - self.get_string_width(txt, True, markdown) + dx = w - self.c_margin - styled_txt_width elif align == "C": - dx = (w - self.get_string_width(txt, True, markdown)) / 2 + dx = (w - styled_txt_width) / 2 else: dx = self.c_margin @@ -2090,7 +2177,10 @@ def cell( if self.ws and self.unifontsubset: space = escape_parens(" ".encode("utf-16-be").decode("latin-1")) s += " 0 Tw" - for txt_frag, style, underline in styled_txt_frags: + for frag in styled_txt_frags: + txt_frag = frag.string + style = frag.style + underline = frag.underline if self.font_style != style: self.font_style = style self.current_font = self.fonts[ @@ -2122,7 +2212,10 @@ def cell( s_width += self.get_string_width(txt_frag, True) s += "] TJ" else: - for txt_frag, style, underline in styled_txt_frags: + for frag in styled_txt_frags: + txt_frag = frag.string + style = frag.style + underline = frag.underline if self.font_style != style: self.font_style = style self.current_font = self.fonts[ @@ -2168,7 +2261,7 @@ def cell( self.link( self.x + dx, self.y + (0.5 * h) - (0.5 * self.font_size), - self.get_string_width(txt, True, markdown), + styled_txt_width, self.font_size, link, ) @@ -2194,18 +2287,22 @@ def _preload_font_styles(self, txt, markdown): so we return the resulting `styled_txt_frags` tuple to avoid repeating this processing later on. """ - if not txt or not markdown: - return tuple([[txt, self.font_style, bool(self.underline)]]) + if not txt: + return tuple() + if not markdown: + return tuple( + [Fragment.from_string(txt, self.font_style, bool(self.underline))] + ) prev_font_style = self.font_style styled_txt_frags = tuple(self._markdown_parse(txt)) page = self.page # We set the current to page to zero so that # set_font() does not produce any text object on the stream buffer: self.page = 0 - if any("B" in style for _, style, _ in styled_txt_frags): + if any("B" in frag.style for frag in styled_txt_frags): # Ensuring bold font is supported: self.set_font(style="B") - if any("I" in style for _, style, _ in styled_txt_frags): + if any("I" in frag.style for frag in styled_txt_frags): # Ensuring italics font is supported: self.set_font(style="I") # Restoring initial style: @@ -2216,7 +2313,7 @@ def _preload_font_styles(self, txt, markdown): def _markdown_parse(self, txt): "Split some text into fragments based on styling: **bold**, __italics__, --underlined--" txt_frag, in_bold, in_italics, in_underline = ( - "", + [], "B" in self.font_style, "I" in self.font_style, bool(self.underline), @@ -2235,10 +2332,10 @@ def _markdown_parse(self, txt): and (len(txt) < 3 or txt[2] != half_marker) ): if txt_frag: - yield ( - txt_frag, + yield Fragment( ("B" if in_bold else "") + ("I" if in_italics else ""), in_underline, + txt_frag, ) if txt[:2] == self.MARKDOWN_BOLD_MARKER: in_bold = not in_bold @@ -2246,16 +2343,16 @@ def _markdown_parse(self, txt): in_italics = not in_italics if txt[:2] == self.MARKDOWN_UNDERLINE_MARKER: in_underline = not in_underline - txt_frag = "" + txt_frag = [] txt = txt[2:] else: - txt_frag += txt[0] + txt_frag.append(txt[0]) txt = txt[1:] if txt_frag: - yield ( - txt_frag, + yield Fragment( ("B" if in_bold else "") + ("I" if in_italics else ""), in_underline, + txt_frag, ) def will_page_break(self, height): @@ -2379,183 +2476,79 @@ def multi_cell( w = self.w - self.r_margin - self.x if h is None: h = self.font_size - wmax = (w - 2 * self.c_margin) * 1000 / self.font_size + maximum_allowed_width = (w - 2 * self.c_margin) * 1000 / self.font_size # Calculate text length txt = self.normalize_text(txt) - s = txt.replace("\r", "") - normalized_string_length = len(s) - if normalized_string_length > 0 and s[-1] == "\n": - normalized_string_length -= 1 + normalized_string = txt.replace("\r", "") + styled_text_fragments = self._preload_font_styles(normalized_string, markdown) prev_font_style, prev_underline = self.font_style, self.underline if markdown and not split_only: self._markdown_leak_end_style = True - - b = 0 - if border: - if border == 1: - border = "LTRB" - b = "LRT" - b2 = "LR" - else: - b2 = "" - if "L" in border: - b2 += "L" - if "R" in border: - b2 += "R" - b = b2 + "T" if "T" in border else b2 - text_cells = [] - sep = -1 - i = 0 - j = 0 - l = 0 - whitespace_count = 0 - nl = 1 prev_x, prev_y = self.x, self.y - while i < normalized_string_length: - # Get next character - c = s[i] - # Explicit line break - if c == "\n": - if self.ws > 0: - self.ws = 0 - self._out("0 Tw") + if not border: + border = "" + elif border == 1: + border = "LTRB" - if max_line_height and h > max_line_height: - height = max_line_height - h -= height - else: - height = h - new_page = self.cell( - w, - h=height, - txt=substr(s, j, i - j), - border=b, - ln=2, - align=align, - fill=fill, - link=link, - markdown=markdown, - ) - page_break_triggered = page_break_triggered or new_page - text_cells.append(substr(s, j, i - j)) + text_lines = [] + multi_line_break = MultiLineBreak( + styled_text_fragments, + self.get_normalized_string_width_with_style, + justify=(align == "J"), + ) + text_line = multi_line_break.get_line_of_given_width(maximum_allowed_width) + while (text_line) is not None: + text_lines.append(text_line) + text_line = multi_line_break.get_line_of_given_width(maximum_allowed_width) - i += 1 - sep = -1 - j = i - l = 0 - whitespace_count = 0 - nl += 1 - if border and nl == 2: - b = b2 - continue + for text_line_index, text_line in enumerate(text_lines): + is_last_line = text_line_index == len(text_lines) - 1 - if c == " ": - sep = i - chars_total_width = l - whitespace_count += 1 - if self.unifontsubset: - l += self.get_string_width(c, True) / self.font_size * 1000 + if max_line_height is not None and h > max_line_height and not is_last_line: + current_cell_height = max_line_height + h -= current_cell_height else: - l += _char_width(self.current_font, c) - - # Automatic line break - if l > wmax: - if sep == -1: - if i == j: - i += 1 - if self.ws > 0: - self.ws = 0 - self._out("0 Tw") - - if max_line_height and h > max_line_height: - height = max_line_height - h -= height - else: - height = h - new_page = self.cell( - w, - h=height, - txt=substr(s, j, i - j), - border=b, - ln=2, - align=align, - fill=fill, - link=link, - markdown=markdown, - ) - page_break_triggered = page_break_triggered or new_page - text_cells.append(substr(s, j, i - j)) - - else: - if align == "J": - self.ws = ( - (wmax - chars_total_width) - / 1000 - * self.font_size - / (whitespace_count - 1) - if whitespace_count > 1 - else 0 - ) - self._out(f"{self.ws * self.k:.3f} Tw") - - if max_line_height and h > max_line_height: - height = max_line_height - h -= height - else: - height = h - new_page = self.cell( - w, - h=height, - txt=substr(s, j, sep - j), - border=b, - ln=2, - align=align, - fill=fill, - link=link, - markdown=markdown, + current_cell_height = h + + word_spacing = 0 + if text_line.justify: + word_spacing = ( + (maximum_allowed_width - text_line.text_width) + / 1000 + * self.font_size + / text_line.number_of_spaces_between_words + ) + self._out(f"{word_spacing * self.k:.3f} Tw") + elif self.ws > 0: + self._out("0 Tw") + self.ws = word_spacing + + new_page = self._render_styled_cell_text( + w, + h=current_cell_height, + styled_txt_frags=text_line.fragments, + border="".join( + ( + "T" if "T" in border and text_line_index == 0 else "", + "L" if "L" in border else "", + "R" if "R" in border else "", + "B" if "B" in border and is_last_line else "", ) - page_break_triggered = page_break_triggered or new_page - text_cells.append(substr(s, j, sep - j)) - - i = sep + 1 - sep = -1 - j = i - l = 0 - whitespace_count = 0 - nl += 1 - if border and nl == 2: - b = b2 - else: - i += 1 - - # Last chunk - if self.ws > 0: - self.ws = 0 - self._out("0 Tw") - if border and "B" in border: - b += "B" - - new_page = self.cell( - w, - h=h, - txt=substr(s, j, i - j), - border=b, - ln=0 if ln == 3 else ln, - align=align, - fill=fill, - link=link, - markdown=markdown, - ) - if new_page: - # When a page jump is performed and ln=3, - # we stick to that new vertical offset. - # cf. test_multi_cell_table_with_automatic_page_break - prev_y = self.y - page_break_triggered = page_break_triggered or new_page - text_cells.append(substr(s, j, i - j)) + ), + ln=(2 if not is_last_line else (0 if ln == 3 else ln)), + align=align, + fill=fill, + link=link, + ) + if is_last_line and new_page and ln == 3: + # When a page jump is performed and ln=3, + # we stick to that new vertical offset. + # cf. test_multi_cell_table_with_automatic_page_break + prev_y = self.y + page_break_triggered = page_break_triggered or new_page new_x, new_y = { 0: (self.x, self.y + h), @@ -2573,12 +2566,20 @@ def multi_cell( _perform_page_break_if_need_be, ) self.set_xy(*location) # restore location - return text_cells + result = [] + for text_line in text_lines: + characters = [] + for frag in text_line.fragments: + characters.extend(frag.characters) + result.append("".join(characters)) + return result if markdown: if self.font_style != prev_font_style: self.font_style = prev_font_style self.current_font = self.fonts[self.font_family + self.font_style] - s += f" /F{self.current_font['i']} {self.font_size_pt:.2f} Tf" + normalized_string += ( + f" /F{self.current_font['i']} {self.font_size_pt:.2f} Tf" + ) self.underline = prev_underline self._markdown_leak_end_style = False @@ -4099,8 +4100,8 @@ def _char_width(font, char): cw = font["cw"] try: width = cw[char] - except IndexError: - width = font["desc"].get("MissingWidth") or 500 + except (IndexError, KeyError): + width = font.get("desc", {}).get("MissingWidth") or 500 if width == 65535: width = 0 return width diff --git a/fpdf/line_break.py b/fpdf/line_break.py new file mode 100644 index 000000000..dd5b30c1d --- /dev/null +++ b/fpdf/line_break.py @@ -0,0 +1,254 @@ +from collections import namedtuple + +SOFT_HYPHEN = "\u00ad" +HYPHEN = "\u002d" +SPACE = " " +NEWLINE = "\n" + + +class Fragment: + def __init__(self, style, underlined, characters=None): + self.characters = [] if characters is None else characters + self.style = style + self.underline = underlined + + @classmethod + def from_string(cls, string, style, underlined): + return cls(style, underlined, list(string)) + + def trim(self, index): + self.characters = self.characters[:index] + + @property + def string(self): + return "".join(self.characters) + + def __eq__(self, other): + return ( + self.characters == other.characters + and self.style == other.style + and self.underline == other.underline + ) + + +TextLine = namedtuple( + "TextLine", + ("fragments", "text_width", "number_of_spaces_between_words", "justify"), +) + +SpaceHint = namedtuple( + "SpaceHint", + ( + "original_fragment_index", + "original_character_index", + "current_line_fragment_index", + "current_line_character_index", + "width", + "number_of_spaces", + ), +) + +HyphenHint = namedtuple( + "HyphenHint", + SpaceHint._fields + + ( + "character_to_append", + "character_to_append_width", + "character_to_append_style", + "character_to_append_underline", + ), +) + + +class CurrentLine: + def __init__(self): + self.fragments = [] + self.width = 0 + self.number_of_spaces = 0 + + # automatic break hints + # CurrentLine class remembers 3 positions + # 1 - position of last inserted character. + # class attributes (`width`, `fragments`) + # is used for this purpose + # 2 - position of last inserted space + # SpaceHint is used fo this purpose. + # 3 - position of last inserted soft-hyphen + # HyphenHint is used fo this purpose. + # The purpose of multiple positions tracking - to have an ability + # to break in multiple places, depending on condition. + self.space_break_hint = None + self.hyphen_break_hint = None + + def add_character( + self, + character, + character_width, + style, + underline, + original_fragment_index, + original_character_index, + ): + assert character != NEWLINE + + if not self.fragments: + self.fragments.append(Fragment(style, underline)) + + # characters are expected to be grouped into fragments by styles and + # underline attributes. If the last existing fragment doesn't match + # the (style, underline) of pending character -> + # create a new fragment with matching (style, underline) + elif ( + style != self.fragments[-1].style + or underline != self.fragments[-1].underline + ): + self.fragments.append(Fragment(style, underline)) + active_fragment = self.fragments[-1] + + if character == SPACE: + self.space_break_hint = SpaceHint( + original_fragment_index, + original_character_index, + len(self.fragments), + len(active_fragment.characters), + self.width, + self.number_of_spaces, + ) + self.number_of_spaces += 1 + elif character == SOFT_HYPHEN: + self.hyphen_break_hint = HyphenHint( + original_fragment_index, + original_character_index, + len(self.fragments), + len(active_fragment.characters), + self.width, + self.number_of_spaces, + HYPHEN, + character_width, + style, + underline, + ) + + if character != SOFT_HYPHEN: + self.width += character_width + active_fragment.characters.append(character) + + def _apply_automatic_hint(self, break_hint): + """ + This function mutates the current_line, applying one of the states + observed in the past and stored in + `hyphen_break_hint` or `space_break_hint` attributes. + """ + self.fragments = self.fragments[: break_hint.current_line_fragment_index] + if self.fragments: + self.fragments[-1].trim(break_hint.current_line_character_index) + self.number_of_spaces = break_hint.number_of_spaces + self.width = break_hint.width + + def manual_break(self, justify=False): + return TextLine( + fragments=self.fragments, + text_width=self.width, + number_of_spaces_between_words=self.number_of_spaces, + justify=(self.number_of_spaces > 0) and justify, + ) + + def automatic_break_possible(self): + return self.hyphen_break_hint is not None or self.space_break_hint is not None + + def automatic_break(self, justify): + assert self.automatic_break_possible() + if self.hyphen_break_hint is not None and ( + self.space_break_hint is None + or self.hyphen_break_hint.width > self.space_break_hint.width + ): + self._apply_automatic_hint(self.hyphen_break_hint) + self.add_character( + self.hyphen_break_hint.character_to_append, + self.hyphen_break_hint.character_to_append_width, + self.hyphen_break_hint.character_to_append_style, + self.hyphen_break_hint.character_to_append_underline, + self.hyphen_break_hint.original_fragment_index, + self.hyphen_break_hint.original_character_index, + ) + return ( + self.hyphen_break_hint.original_fragment_index, + self.hyphen_break_hint.original_character_index, + self.manual_break(justify), + ) + self._apply_automatic_hint(self.space_break_hint) + return ( + self.space_break_hint.original_fragment_index, + self.space_break_hint.original_character_index, + self.manual_break(justify), + ) + + +class MultiLineBreak: + def __init__(self, styled_text_fragments, size_by_style, justify=False): + + self.styled_text_fragments = styled_text_fragments + + self.size_by_style = size_by_style + self.justify = justify + + self.fragment_index = 0 + self.character_index = 0 + + def _get_character_width(self, character, style=""): + if character == SOFT_HYPHEN: + # HYPHEN is inserted instead of SOFT_HYPHEN + character = HYPHEN + return self.size_by_style(character, style) + + def get_line_of_given_width(self, maximum_width): + + if self.fragment_index == len(self.styled_text_fragments): + return None + + current_line = CurrentLine() + while self.fragment_index < len(self.styled_text_fragments): + + current_fragment = self.styled_text_fragments[self.fragment_index] + + if self.character_index >= len(current_fragment.characters): + self.character_index = 0 + self.fragment_index += 1 + continue + + character = current_fragment.characters[self.character_index] + character_width = self._get_character_width( + character, current_fragment.style + ) + + if character == NEWLINE: + self.character_index += 1 + return current_line.manual_break() + + if current_line.width + character_width > maximum_width: + if character == SPACE: + self.character_index += 1 + return current_line.manual_break(self.justify) + if current_line.automatic_break_possible(): + ( + self.fragment_index, + self.character_index, + line, + ) = current_line.automatic_break(self.justify) + self.character_index += 1 + return line + return current_line.manual_break() + + current_line.add_character( + character, + character_width, + current_fragment.style, + current_fragment.underline, + self.fragment_index, + self.character_index, + ) + + self.character_index += 1 + + if current_line.width: + return current_line.manual_break() diff --git a/test/cells/cell_markdown_right_aligned.pdf b/test/cells/cell_markdown_right_aligned.pdf index 593ba3f554e3a9ba553c42af40896fe3f9c99e34..be2e100598a86e0969611b033526228b94add914 100644 GIT binary patch delta 22 ecmaDK@;+q3I_B4s(i1mou)IDH^mgOjNtytJv5|DF7X8)%M6<5W#lg>TF#z zH5wmT5;}=PG3p&>Mwh^1uCEH8XQdxdOyRGd z(eV47sNNTmpDm(q0-qT;SJcZNVdL?gx4!b9?fjPxUK8fZB~;m-(9F=Xx9I6e5!f7P za9O=wbJpj~M)~K$s-EUMZk^d7RG?q5PPjhq%5yXpE6CY*TIC{<9*VckAT(z*Zk; z^|>;u`p7*8j>$FKC4+z=W^kxyU9Z=>|DL%?KAe>z5&Ob9=l$5m-mTX zwI}~EE!@94Yw}X2j(Ss5l$0{%eBK=efn)FMx-7S~2mF(c*IwGbV*4-FMW&(Vr*3Jl zU;E`_vv?chmQ#gi%Nz2DV`fB^4f0IO8k8k|1DA({k$HTlh_OHGD8SsbwcqO zry6CBC#>3~Lf-toa+{d4g9N;1GJc*QmsH%jvfJX0Y3B*BDJ_S?T;61NckoIoI=j8L zNpW4p&8nD^Z^-|3PFD1xmI>Di!Ro%-1c^uovM~q$Ii1D6@ay9FktPDohZz3wJ>PNQ zXVvZ7;VN_Nj{V*`aiYatcDMQG#JqlPW@r7uC~jnFtY82F3V8}#V1|LAiG|^0F?Q=f o19TY!b2AHcF+*buQw%XvEMjKHmXjB;yKj4n4sq!PemJKwc|mb~*i44Qf_62#dwxDu z=Ba;Yx_McWv0>&^6>e73Jdq<^d&BOYvWm_BEVp`@(CM$IxWzaP(pWdw?B5d=;eS~! zwstr5`t&T_@IBe2!#R-v46 zg0NA0_Kb$G8(J|pf}RM6)fe<$YJYjCTG?#D{9`q;pWoLkYZTdReOLGM^w@y#eZ?74 zCl%5bCoJ>ltDSoMGK>AW<@I}21$%VWRFlu-COZcbnd8q2piK)+)aiS{`^} zPIcEZ;k6g*kDvbBnl^We@7pyiC5}C9I3hcz^Y*l(!eIrxc_&)s)<)D+oIbv{KJ-e? zI}i0Qlh>TL2{W;oe4+63mYYt#3N5R6HZK>bg|Mnc0%pRE~+fc5hS!s+(Z>UBp-U z=u<{<&QD@%Ira3M8~pEStY3Iz^-iW#pDG{9rb2LD9L5Y?z}q&0Xr*$8|>|$Ogh}Yp~&*d+-i?=Q!W1FY6R-mnAKPP_YV-uN)-G3_3^!zr3Ym{$~sdn8vNu;&izFu6X1Tt-e#eD5g+4zMr&xp}%oSzr)1LEZ|4SRWOHTQVPKN#9 zoN5wsWLH4SEEA^324}aI-fUA&$T{!uJgQ$`bHlkVCXM&i->>gkE>Ss2Xx(igiCe-K zq%TNybVZ1FF?nb{Q>}4-*~F?{mNxB8S)eD^n$1d^TV5GxeN48kI(uVYKvs(7o4PHX7n3{NQ13*k!z8BHYgSatX?8Vpv`O`&x1BnfO2`X`SY~3v4^Kb7kPICLzSf9Vd;@t1Xi}e-T!n`;CX8FM=Zfszx zU;qLNc?w)$hJm4prSW7gcI!Y483S{3V+_^CmgX2@rdY(xObsVbV|V2=HQ`cKb@g}S F0s#1A4z~aR diff --git a/test/cells/multi_cell_markdown.pdf b/test/cells/multi_cell_markdown.pdf index ee99521bc08be87bf0d9833579c8e55299dabff4..53983f15b3dd9305d8df23bf91f9d2cfe3e8a3a0 100644 GIT binary patch delta 390 zcmdnUwVrE2L%pG$9anKlQEFl?SH+yI6L$L@F%WS5U2}Abqth||Iquyotc$+z&!~uy zJX+R~`|)=Kx1X6%iMMU|^D}#8=1F^{@XtTn+_YPrtx4F+aqq#!SrV$uQx}|bKgc1y zK)%Z8UbE4KeJ!H0H&oNu)y^cczBRaeh7r z&*1g-OX3E#<*o(4o|Lv;X5KD+^IFO_`N+w$ZY%%svd(j{{P((M_wwD>GOUCCY`rdi z{L+@yT&Z~}P%o!$ZeTpfBxYfuU;qLNc?w)$hJk^(nZ@LA7VAJ03>i~%BMdP!GfNCH a3rkFOmL_JCAG5e}npkqFs=E5SaRC6keUIG$ delta 376 zcmZ3_wUKK=L%p$`9anKlQEFl?SH+yQ7Y^pND2T9L*c)Fs-7Hh0(Xr(AlFb>rp0KaD zz}edo>QTS{NYCRLB^O&?f4BHvD0#1|i=%V?BiDx|3m^1^h+a{T3spR^GjHKE$>l9( z6M9wNN}gI!!$0|J!Q(kLJ}(Zlyv?mSCVVA)+6}d-u@Y&wU8OJAZ}BlctC6|n&TXeP z+syJxil4W=_O0Hry5()`xtVKE?%bev)^5h`(;TTu?~Uq;{l))1aCv>aW~;PS`W#Q^ zKib-^Uw$dqi<+&IyxO-ude^+YR}vn6ExmAI?{B|ZYx=j(= z|M1}bJ*t}|xl;2|AfDb_!+4lU%+f%?00b2B6u7_)0|Rq&gUO*R){JJ8C$b3JnqbJA enHyq=Sy*DKvox`o{E)?!)6|elRn^tsjSB$x>7361 diff --git a/test/cells/multi_cell_markdown_with_ttf_fonts.pdf b/test/cells/multi_cell_markdown_with_ttf_fonts.pdf index d13d64b6d72b78c6c1be1313a3e7e8921144fefb..ba9d7e3ba2369293f88fc435bc2f3abcfe6f3a01 100644 GIT binary patch delta 865 zcmaF3l=1db#tHRI=B5)H2A<@>P#OnLcPW@=|Nwu;ri^4to{)TNA z>bFe4^~kcm?8g_a0MBEVyYg!acV0PjMJTpn1&grhty`OxIel+cv3eZMD0wDuV!~w2r8yep(QCn`g+u(IxOz0!_UCNE8IBpruVRB`w_dfqf zHhS&(K;z3F>iE{OTiV3?u~?ox`J(ER%-Y!=cWq*mt4g+=-LT+$_=kVhod#M_6R)-@ zq)+I7ZB_hL|J5HML$;7JW~=7=uX5aHs(R}}skEJ4yxA=Oj#-(hOP)-fD(ZN0TCMlP zW%J%A@Xr@Nqe=c%{965`*KShzk9aDhD*Kg)~uVR$0PPK!Dp6e-XykCjjWDKS5r6=Lf8wo zie6{<^X#Q|WB~(bz1r(VyHm8ClS`9j>y?kMoHco&qVQ&ErW`*}BLxExP{>o@0yB&Z zj4Vti&k3-qH?%;LGB7YVGe#FPG%&No5VJJERA*^!h_24a&=SKABNGdVdG!c48JL?G znWAepH#IlF5VJ5fLl?6!Gs3Xg!psswuZ5)%ida1;IDlg40c2=wVTPg*sLap=7&d4Y z8=4qdV2GI-7@44}Gc~Y84=qDeVTS FTmbXZ77zdc delta 888 zcmcb;l=0zG#tHRI7KRfW9wOV`a&$luT>zg1|JQ+(yd=i%`u776bYop6w4nL%@9 zXMb4OA*+>h&98Ypk7bNVV&slE#C2`)tU1qf9iKmJ zZoF8;ryuk5x8WMuBODc$%uj689=|qBFiF!gO|~!!5~y6+tMqc~#eF9Y9y4-R{O=WX z3*k14HhjwRNigyC`v0wS=Q)=NF1-+Xwcn#kpz!3vu$tK8@f|Z_dsld9yzN->F{Ogh zh4;^_$Gx@p4~W^FZ%N*Cv)3?I+U@w6n{VsqHRnv8{9Rn3MOBV-REy5^{3KleP?*L#fDAQx1(a;#LBF>bZ|zAww;P-R^S(brE5%HlFq zFxV{4l;tOCpMYI8 z(bX9l8DQ99WMT<1uO8th19KB&po3A|3AEPS3`5Mq%m`f!D1>3Lg_$LWUJFYj6tQ}c zb(Tg3=mBJCY+;6?5U9-1#K6!P&0<3nBTEc1Qv+iIbakc%mgu2nXliU^i(#>;8HQtk a0ceQsJD`}k(d4cmPYyFKRaIAiH!c9r>KJVR diff --git a/test/test_line_break.py b/test/test_line_break.py new file mode 100644 index 000000000..70ce1ed1a --- /dev/null +++ b/test/test_line_break.py @@ -0,0 +1,830 @@ +from fpdf.line_break import Fragment, MultiLineBreak, TextLine + + +def test_no_fragments(): + """ + There is no text provided to break into multiple lines + expected behavior -> + - call to `get_line_of_given_width` always returns None + """ + alphabet = { + "normal": {}, + } + multi_line_break = MultiLineBreak([], lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(100000) is None + assert multi_line_break.get_line_of_given_width(1) is None + + +def test_width_calculation(): + """ + Every character has different width + """ + text = "abcd" + alphabet = { + "normal": {}, + } + for width, char in enumerate(text): + alphabet["normal"][char] = width + 2 + fragments = [ + Fragment.from_string(text, "normal", False), + ] + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + + # zero width returns empty line + assert multi_line_break.get_line_of_given_width(0) == TextLine( + fragments=[], text_width=0, number_of_spaces_between_words=0, justify=False + ) + # the first character has width of 2 units. request of 1 unit line returns + # an empty line + assert multi_line_break.get_line_of_given_width(1) == TextLine( + fragments=[], text_width=0, number_of_spaces_between_words=0, justify=False + ) + # get other characters one by one + assert multi_line_break.get_line_of_given_width(2) == TextLine( + fragments=[Fragment.from_string("a", "normal", False)], + text_width=2, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(3) == TextLine( + fragments=[Fragment.from_string("b", "normal", False)], + text_width=3, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(4) == TextLine( + fragments=[Fragment.from_string("c", "normal", False)], + text_width=4, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(5) == TextLine( + fragments=[Fragment.from_string("d", "normal", False)], + text_width=5, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_single_space_in_fragment(): + """ + there is only one space character in the input text. + expected behavior -> + - first call to `get_line_of_given_width` contains space. + - second call to `get_line_of_given_width` is None because there is no + text left. + """ + text = " " + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(5000) == TextLine( + fragments=fragments, + text_width=500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_single_soft_hyphen_in_fragment(): + """ + there is only one soft hyphen character in the input text. + expected behavior -> + - call to `get_line_of_given_width` always returns None, because soft + hyphen doesn't break a word + """ + alphabet = { + "normal": {"\u002d": 500}, + } + text = "\u00ad" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_single_hard_hyphen_in_fragment(): + """ + there is only one hard hyphen character in the input text. + expected behavior -> + - first call to `get_line_of_given_width` contains hard hyphen. + - second call to `get_line_of_given_width` is None because there is no + """ + alphabet = { + "normal": {"\u002d": 500}, + } + text = "\u002d" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(5000) == TextLine( + fragments=fragments, + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_real_hyphen_acts_differently_from_soft_hyphen(): + words = ["a", "b", "c", "d"] + alphabet = { + "normal": {"\u002d": 500}, + } + words_separated_by_soft_hyphen = "\u00ad".join(words) + words_separated_by_hard_hyphen = "\u002d".join(words) + for char in words_separated_by_soft_hyphen: + alphabet["normal"][char] = 500 + soft_hyphen_line_break = MultiLineBreak( + [Fragment.from_string(words_separated_by_soft_hyphen, "normal", False)], + lambda a, b: alphabet[b][a], + ) + hard_hyphen_line_break = MultiLineBreak( + [Fragment.from_string(words_separated_by_hard_hyphen, "normal", False)], + lambda a, b: alphabet[b][a], + ) + assert soft_hyphen_line_break.get_line_of_given_width( + 2000 + ) != hard_hyphen_line_break.get_line_of_given_width(2000) + assert soft_hyphen_line_break.get_line_of_given_width( + 2000 + ) != hard_hyphen_line_break.get_line_of_given_width(2000) + + +def test_trailing_soft_hyphen(): + """ + fit one word and trailing soft-hyphen into the line with extremely large width. + expected behavior -> + - first call to `get_line_of_given_width` cointains the word. + soft hyphen is not included in the line. + - second call to `get_line_of_given_width` is None because there is no + text left. + """ + text = "hello\u00ad" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {"\u002d": 500}, + } + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(5000) == TextLine( + fragments=[Fragment.from_string("hello", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_trailing_whitespace(): + """ + fit one word and trailing whitespace into the line with extremely large width. + expected behavior -> + - first call to `get_line_of_given_width` cointains the word and the space. + - second call to `get_line_of_given_width` is None because there is no + text left. + """ + text = "hello " + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(5000) == TextLine( + fragments=fragments, + text_width=3000, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_two_words_one_line(): + """ + fit two words into the line with extremely large width. + expected behavior -> + - first call to `get_line_of_given_width` cointains all words. + - second call to `get_line_of_given_width` is None because there is no + text left. + """ + text = "hello world" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(100000) == TextLine( + fragments=fragments, + text_width=5500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_two_words_one_line_justify(): + """ + fit two words into the line with extremely large width. + expected behavior -> + - first call to `get_line_of_given_width` cointains all words. + this line is expected to be unjustified, because it is the last + line. + - second call to `get_line_of_given_width` is None because there is no + text left. + """ + text = "hello world" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(100000) == TextLine( + fragments=fragments, + text_width=5500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(100000) is None + + +def test_two_words_two_lines_break_by_space(): + """ + fit two words into the line that can fit only one word. + expected behavior: + - first call to `get_line_of_given_width` cointains the first word. + - second call to `get_line_of_given_width` cointains the second word. + - third call to `get_line_of_given_width` is None because there is no + text left. + """ + text = "hello world" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("hello", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("world", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) is None + + +def test_two_words_two_lines_break_by_space_justify(): + """ + fit two words into the line that can fit only one word. + expected behavior: + - first call to `get_line_of_given_width` cointains the first word. + Line is expected to be unjustified, because there are no spaces in + the line. + - second call to `get_line_of_given_width` cointains the second word. + Line is expected to be unjustified, because it is the last line. + - third call to `get_line_of_given_width` is None because there is no + text left. + """ + text = "hello world" + fragments = [ + Fragment.from_string(text, "normal", False), + ] + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("hello", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("world", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) is None + + +def test_four_words_two_lines_break_by_space(): + """ + fit two words into the line that can fit only one word. + expected behavior: + - first call to `get_line_of_given_width` cointains the first word. + - second call to `get_line_of_given_width` cointains the second word. + - third call to `get_line_of_given_width` is None because there is no + text left. + """ + first_line_text = "hello world" + second_line_text = "hello world" + text = " ".join([first_line_text, second_line_text]) + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(6000) == TextLine( + fragments=[Fragment.from_string(first_line_text, "normal", False)], + text_width=5500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(6000) == TextLine( + fragments=[Fragment.from_string(second_line_text, "normal", False)], + text_width=5500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) is None + + +def test_four_words_two_lines_break_by_space_justify(): + """ + fit two words into the line that can fit only one word. + expected behavior: + - first call to `get_line_of_given_width` cointains the first word. + Line is expected to be justified. + - second call to `get_line_of_given_width` cointains the second word. + Line is expected to be unjustified, because it is the last line. + - third call to `get_line_of_given_width` is None because there is no + text left. + """ + first_line_text = "hello world" + second_line_text = "hello world" + text = " ".join((first_line_text, second_line_text)) + fragments = [ + Fragment.from_string(text, "normal", False), + ] + alphabet = { + "normal": {}, + } + for char in text: + alphabet["normal"][char] = 500 + + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(6000) == TextLine( + fragments=[Fragment.from_string(first_line_text, "normal", False)], + text_width=5500, + number_of_spaces_between_words=1, + justify=True, + ) + assert multi_line_break.get_line_of_given_width(6000) == TextLine( + fragments=[Fragment.from_string(second_line_text, "normal", False)], + text_width=5500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) is None + + +def test_break_fragment_into_two_lines(): + """ + There are multiple fragments with different styles. + This test breaks one fragment between two lines. + """ + alphabet = { + "normal": {}, + "bold": {}, + } + first_line_text = "one " + second_line_text = "two three" + third_line_text = " four" + text = "".join((first_line_text, second_line_text, third_line_text)) + for char in text: + alphabet["normal"][char] = 500 + alphabet["bold"][char] = 1000 + + fragments = [ + Fragment.from_string(first_line_text, "normal", False), + Fragment.from_string(second_line_text, "bold", False), + Fragment.from_string(third_line_text, "normal", False), + ] + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(5000) == TextLine( + fragments=[ + Fragment.from_string(first_line_text, "normal", False), + Fragment.from_string("two", "bold", False), + ], + text_width=5000, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(8000) == TextLine( + fragments=[ + Fragment.from_string("three", "bold", False), + Fragment.from_string(third_line_text, "normal", False), + ], + text_width=7500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(6000) is None + + +def test_break_fragment_into_two_lines_justify(): + """ + There are multiple fragments with different styles. + This test breaks one fragment between two lines. + """ + alphabet = { + "normal": {}, + "bold": {}, + } + first_line_text = "one " + second_line_text = "two three" + third_line_text = " four" + text = "".join((first_line_text, second_line_text, third_line_text)) + for char in text: + alphabet["normal"][char] = 500 + alphabet["bold"][char] = 1000 + + fragments = [ + Fragment.from_string(first_line_text, "normal", False), + Fragment.from_string(second_line_text, "bold", False), + Fragment.from_string(third_line_text, "normal", False), + ] + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(5000) == TextLine( + fragments=[ + Fragment.from_string(first_line_text, "normal", False), + Fragment.from_string("two", "bold", False), + ], + text_width=5000, + number_of_spaces_between_words=1, + justify=True, + ) + assert multi_line_break.get_line_of_given_width(8000) == TextLine( + fragments=[ + Fragment.from_string("three", "bold", False), + Fragment.from_string(third_line_text, "normal", False), + ], + text_width=7500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(6000) is None + + +def test_soft_hyphen_break(): + """ + all characters are separated by soft-hyphen + expected behavior - there is a hard hyphen at the end of every line, + except of the last one + """ + alphabet = { + "normal": {"\u002d": 500}, + } + long_string = "\u00ad".join("abcdefghijklmnop") + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("abcd\u002d", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("efgh\u002d", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2200) == TextLine( + fragments=[Fragment.from_string("ijk\u002d", "normal", False)], + text_width=2000, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) == TextLine( + fragments=[Fragment.from_string("l\u002d", "normal", False)], + text_width=1000, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) == TextLine( + fragments=[Fragment.from_string("m\u002d", "normal", False)], + text_width=1000, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) == TextLine( + fragments=[Fragment.from_string("n\u002d", "normal", False)], + text_width=1000, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) == TextLine( + fragments=[Fragment.from_string("op", "normal", False)], + text_width=1000, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None + + +def test_soft_hyphen_break_justify(): + """ + all characters are separated by soft-hyphen + expected behavior - there is a hard hyphen at the end of every line, + except of the last one + """ + alphabet = { + "normal": {"\u002d": 500}, + } + words = ["ab cd", "ef gh", "kl mn"] + long_string = "\u00ad".join(words) + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(3000) == TextLine( + fragments=[Fragment.from_string("ab cd\u002d", "normal", False)], + text_width=3000, + number_of_spaces_between_words=1, + justify=True, + ) + assert multi_line_break.get_line_of_given_width(3000) == TextLine( + fragments=[Fragment.from_string("ef gh\u002d", "normal", False)], + text_width=3000, + number_of_spaces_between_words=1, + justify=True, + ) + assert multi_line_break.get_line_of_given_width(3000) == TextLine( + fragments=[Fragment.from_string("kl mn", "normal", False)], + text_width=2500, + number_of_spaces_between_words=1, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None + + +def test_explicit_break(): + """ + There is an explicit break character after every character + Expected behavior: + `get_line_of_given_width` returns single character on every call + """ + alphabet = { + "normal": {}, + } + long_string = "\n".join("abcd") + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("a", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("b", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("c", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("d", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None + + +def test_explicit_break_justify(): + """ + There is an explicit break character after every character + Expected behavior: + `get_line_of_given_width` returns single character on every call, + returned lines are expected to be unjustified + """ + alphabet = { + "normal": {}, + } + long_string = "\n".join("abcd") + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("a", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("b", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("c", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("d", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None + + +def test_single_world_doesnt_fit_into_width(): + """ + There is a single word that doesn't fit into requested line + Expected behavior: + `get_line_of_given_width` as much characters as can fit into user + provided width. + """ + alphabet = { + "normal": {}, + } + long_string = "abcdefghijklmnop" + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak(fragments, lambda a, b: alphabet[b][a]) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("abcde", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("fghij", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("klmno", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("p", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None + + +def test_single_world_doesnt_fit_into_width_justify(): + """ + There is a single word that doesn't fit into requested line + Expected behavior: + `get_line_of_given_width` as much characters as can fit into user + provided width. returned lines are expected to be unjustified + """ + alphabet = { + "normal": {}, + } + long_string = "abcdefghijklmnop" + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("abcde", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("fghij", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("klmno", "normal", False)], + text_width=2500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=[Fragment.from_string("p", "normal", False)], + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None + + +def test_last_line_no_justify(): + """ + Make sure that the last line is not justified. + """ + alphabet = { + "normal": {}, + } + long_string = "a" + for char in long_string: + alphabet["normal"][char] = 500 + + fragments = [ + Fragment.from_string(long_string, "normal", False), + ] + multi_line_break = MultiLineBreak( + fragments, lambda a, b: alphabet[b][a], justify=True + ) + assert multi_line_break.get_line_of_given_width(2500) == TextLine( + fragments=fragments, + text_width=500, + number_of_spaces_between_words=0, + justify=False, + ) + assert multi_line_break.get_line_of_given_width(1000) is None diff --git a/test/test_markdown_parse.py b/test/test_markdown_parse.py index 2d72d78c3..23fb210e5 100644 --- a/test/test_markdown_parse.py +++ b/test/test_markdown_parse.py @@ -1,41 +1,46 @@ # pylint: disable=protected-access from fpdf import FPDF +from fpdf.line_break import Fragment def test_markdown_parse_simple_ok(): assert tuple( FPDF()._markdown_parse("**bold**, __italics__ and --underlined--") ) == ( - ("bold", "B", False), - (", ", "", False), - ("italics", "I", False), - (" and ", "", False), - ("underlined", "", True), + Fragment.from_string("bold", "B", False), + Fragment.from_string(", ", "", False), + Fragment.from_string("italics", "I", False), + Fragment.from_string(" and ", "", False), + Fragment.from_string("underlined", "", True), ) def test_markdown_parse_overlapping(): assert tuple(FPDF()._markdown_parse("**bold __italics__**")) == ( - ("bold ", "B", False), - ("italics", "BI", False), + Fragment.from_string("bold ", "B", False), + Fragment.from_string("italics", "BI", False), ) def test_markdown_parse_crossing_markers(): assert tuple(FPDF()._markdown_parse("**bold __and** italics__")) == ( - ("bold ", "B", False), - ("and", "BI", False), - (" italics", "I", False), + Fragment.from_string("bold ", "B", False), + Fragment.from_string("and", "BI", False), + Fragment.from_string(" italics", "I", False), ) def test_markdown_parse_unterminated(): assert tuple(FPDF()._markdown_parse("**bold __italics__")) == ( - ("bold ", "B", False), - ("italics", "BI", False), + Fragment.from_string("bold ", "B", False), + Fragment.from_string("italics", "BI", False), ) def test_markdown_parse_line_of_markers(): - assert tuple(FPDF()._markdown_parse("*** woops")) == (("*** woops", "", False),) - assert tuple(FPDF()._markdown_parse("----------")) == (("----------", "", False),) + assert tuple(FPDF()._markdown_parse("*** woops")) == ( + Fragment.from_string("*** woops", "", False), + ) + assert tuple(FPDF()._markdown_parse("----------")) == ( + Fragment.from_string("----------", "", False), + )