From 370d80b38898753399c472fc59537566da07e836 Mon Sep 17 00:00:00 2001 From: apple1417 Date: Fri, 26 Jan 2024 23:50:30 +1300 Subject: [PATCH] improve html plain text conversions --- src/console_mod_menu/draw.py | 12 +++ src/console_mod_menu/option_formatting.py | 6 +- src/console_mod_menu/screens/mod.py | 4 +- src/mods_base/__init__.py | 7 +- src/mods_base/html_to_plain_text.py | 103 ++++++++++++++++++++++ src/mods_base/mod_list.py | 31 ++----- 6 files changed, 128 insertions(+), 35 deletions(-) create mode 100644 src/mods_base/html_to_plain_text.py diff --git a/src/console_mod_menu/draw.py b/src/console_mod_menu/draw.py index 2752fa5..86b73bb 100644 --- a/src/console_mod_menu/draw.py +++ b/src/console_mod_menu/draw.py @@ -23,3 +23,15 @@ def draw(msg: str, indent: int = 0) -> None: for line in wrapper.fill(html_to_plain_text(msg)).splitlines(): print(prefix, line) + + +def draw_description(description: str, indent: int = 0) -> None: + """ + Draws a message coming from a mod/option description - honoring existing newlines. + + Args: + description: The description to write. + indent: How much to indent the message. + """ + for line in html_to_plain_text(description).splitlines(): + draw(line, indent) diff --git a/src/console_mod_menu/option_formatting.py b/src/console_mod_menu/option_formatting.py index 23dfbd1..0426c9a 100644 --- a/src/console_mod_menu/option_formatting.py +++ b/src/console_mod_menu/option_formatting.py @@ -2,7 +2,7 @@ from mods_base import JSON, BaseOption, BoolOption, KeybindOption, ValueOption -from .draw import draw +from .draw import draw, draw_description from .screens import draw_stack_header _J = TypeVar("_J", bound=JSON) @@ -64,8 +64,6 @@ def draw_option_header(option: BaseOption) -> None: if len(option.description) > 0: draw("=" * 32) - # Respect newlines - passing everything at once would let them get wrapped arbitrarily - for line in option.description.splitlines(): - draw(line) + draw_description(option.description) draw("") diff --git a/src/console_mod_menu/screens/mod.py b/src/console_mod_menu/screens/mod.py index e791f51..5493888 100644 --- a/src/console_mod_menu/screens/mod.py +++ b/src/console_mod_menu/screens/mod.py @@ -17,7 +17,7 @@ ) from unrealsdk import logging -from console_mod_menu.draw import draw +from console_mod_menu.draw import draw, draw_description from console_mod_menu.option_formatting import draw_option_header, get_option_value_str from . import ( @@ -140,7 +140,7 @@ def draw(self) -> None: # noqa: D102 draw("") if self.mod.description: - draw(self.mod.description) + draw_description(self.mod.description) draw("") if not self.mod.enabling_locked: diff --git a/src/mods_base/__init__.py b/src/mods_base/__init__.py index ba9c7ce..e9410c2 100644 --- a/src/mods_base/__init__.py +++ b/src/mods_base/__init__.py @@ -36,14 +36,14 @@ command, remove_next_console_line_capture, ) -from .hook import hook +from .hook import HookProtocol, hook +from .html_to_plain_text import html_to_plain_text from .keybinds import EInputEvent, KeybindType, keybind from .mod import Game, Library, Mod, ModType from .mod_factory import build_mod from .mod_list import ( deregister_mod, get_ordered_mod_list, - html_to_plain_text, register_mod, ) from .options import ( @@ -83,6 +83,7 @@ "GroupedOption", "HiddenOption", "hook", + "HookProtocol", "html_to_plain_text", "JSON", "keybind", @@ -91,9 +92,9 @@ "Library", "Mod", "MODS_DIR", - "open_in_mod_dir", "ModType", "NestedOption", + "open_in_mod_dir", "raw_keybinds", "register_mod", "remove_next_console_line_capture", diff --git a/src/mods_base/html_to_plain_text.py b/src/mods_base/html_to_plain_text.py new file mode 100644 index 0000000..65a520f --- /dev/null +++ b/src/mods_base/html_to_plain_text.py @@ -0,0 +1,103 @@ +# ruff: noqa: D102 + +from dataclasses import dataclass +from functools import cache +from html.parser import HTMLParser + + +@dataclass +class OrderedList: + num: int = 1 + + +@dataclass +class UnorderedList: + pass + + +class PlainTextHTMLConverter(HTMLParser): + plain_text: str + list_item_stack: list[OrderedList | UnorderedList] + + def __init__(self) -> None: + super().__init__() + + self.plain_text = "" + self.list_item_stack = [] + + def handle_data(self, data: str) -> None: + self.plain_text += data + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + match tag.lower(): + case "br": + self.plain_text += "\n" + + case "ol": + self.plain_text += "\n" + self.list_item_stack.append(OrderedList()) + + case "ul": + self.plain_text += "\n" + self.list_item_stack.append(UnorderedList()) + + case "li": + if len(self.list_item_stack) >= 1: + list_state = self.list_item_stack[-1] + match list_state: + case OrderedList(): + self.plain_text += f"{list_state.num}. " + list_state.num += 1 + case UnorderedList(): + self.plain_text += "- " + + case "img": + for name, val in attrs: + if name.lower() == "alt" and val is not None: + self.plain_text += val + break + + case _: + pass + + def handle_endtag(self, tag: str) -> None: + match tag.lower(): + case "ol": + if isinstance(self.list_item_stack[-1], OrderedList): + self.list_item_stack.pop() + + case "ul": + if isinstance(self.list_item_stack[-1], UnorderedList): + self.list_item_stack.pop() + + case "li": + self.plain_text += "\n" + + case _: + pass + + +@cache +def html_to_plain_text(html: str) -> str: + """ + Extracts plain text from HTML-containing text. This is *NOT* input sanitisation. + + Removes most tags in place, and decodes entities - `&` becomes `&`. + + A few tags are substituted for plain text equivalents: + - `
` becomes a newline + - `
  1. ` becomes `1. ` (incrementing with each list item) + - `
    • ` becomes `- ` + - `xyz` becomes it's alt text + + Intended for use when accessing a mod name/description/option/etc., which may contain HTML tags, + but in a situation where such tags would be inappropriate. + + Args: + html: The HTML-containing text. + Returns: + The extracted plain text. + """ + parser = PlainTextHTMLConverter() + parser.feed(html) + return parser.plain_text diff --git a/src/mods_base/mod_list.py b/src/mods_base/mod_list.py index 37a8cf8..811a53d 100644 --- a/src/mods_base/mod_list.py +++ b/src/mods_base/mod_list.py @@ -1,7 +1,6 @@ import os from dataclasses import dataclass, field from functools import cmp_to_key -from html.parser import HTMLParser from pathlib import Path import pyunrealsdk @@ -10,6 +9,7 @@ from . import MODS_DIR, __version__ from .command import AbstractCommand from .hook import HookProtocol +from .html_to_plain_text import html_to_plain_text from .keybinds import KeybindType from .mod import Game, Library, Mod, ModType from .options import BaseOption, ButtonOption @@ -56,9 +56,11 @@ def description(self) -> str: # Once already sorted, re-sorting should be relatively quick self.components.sort(key=lambda c: c.name.lower()) - description = "Components:\n" + description = "Components:" + description += "
        " for comp in self.components: - description += f"- {comp.name}: {comp.version}\n" + description += f"
      • {comp.name}: {comp.version}
      • " + description += "
      " return description @@ -118,29 +120,6 @@ def deregister_mod(mod: Mod) -> None: mod_list.remove(mod) -def html_to_plain_text(html: str) -> str: - """ - Extracts plain text from HTML-containing text. This is *NOT* input sanitisation. - - Removes tags, and decodes entities - `&` becomes `&`. - - Intended for use when accessing a mod name/description/option/etc., which may contain HTML tags, - but in a situation where such tags would be inappropriate. - - Args: - html: The HTML-containing text. - Returns: - The extracted plain text. - """ - extracted_data: list[str] = [] - - parser = HTMLParser() - parser.handle_data = lambda data: extracted_data.append(data) - parser.feed(html) - - return "".join(extracted_data) - - def get_ordered_mod_list() -> list[Mod]: """ Gets the list of mods, in display order.