Skip to content

Commit

Permalink
Merge pull request #13 from apple1417/master
Browse files Browse the repository at this point in the history
improve html plain text conversions
  • Loading branch information
apple1417 authored Jan 29, 2024
2 parents 0b95cbe + 370d80b commit 6b0d0ae
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 35 deletions.
12 changes: 12 additions & 0 deletions src/console_mod_menu/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,15 @@ def draw(msg: str, indent: int = 0) -> None:

for line in wrapper.fill(html_to_plain_text(msg)).splitlines():
print(prefix, line)


def draw_description(description: str, indent: int = 0) -> None:
"""
Draws a message coming from a mod/option description - honoring existing newlines.
Args:
description: The description to write.
indent: How much to indent the message.
"""
for line in html_to_plain_text(description).splitlines():
draw(line, indent)
6 changes: 2 additions & 4 deletions src/console_mod_menu/option_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from mods_base import JSON, BaseOption, BoolOption, KeybindOption, ValueOption

from .draw import draw
from .draw import draw, draw_description
from .screens import draw_stack_header

_J = TypeVar("_J", bound=JSON)
Expand Down Expand Up @@ -64,8 +64,6 @@ def draw_option_header(option: BaseOption) -> None:

if len(option.description) > 0:
draw("=" * 32)
# Respect newlines - passing everything at once would let them get wrapped arbitrarily
for line in option.description.splitlines():
draw(line)
draw_description(option.description)

draw("")
4 changes: 2 additions & 2 deletions src/console_mod_menu/screens/mod.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from unrealsdk import logging

from console_mod_menu.draw import draw
from console_mod_menu.draw import draw, draw_description
from console_mod_menu.option_formatting import draw_option_header, get_option_value_str

from . import (
Expand Down Expand Up @@ -140,7 +140,7 @@ def draw(self) -> None: # noqa: D102
draw("")

if self.mod.description:
draw(self.mod.description)
draw_description(self.mod.description)
draw("")

if not self.mod.enabling_locked:
Expand Down
7 changes: 4 additions & 3 deletions src/mods_base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
command,
remove_next_console_line_capture,
)
from .hook import hook
from .hook import HookProtocol, hook
from .html_to_plain_text import html_to_plain_text
from .keybinds import EInputEvent, KeybindType, keybind
from .mod import Game, Library, Mod, ModType
from .mod_factory import build_mod
from .mod_list import (
deregister_mod,
get_ordered_mod_list,
html_to_plain_text,
register_mod,
)
from .options import (
Expand Down Expand Up @@ -83,6 +83,7 @@
"GroupedOption",
"HiddenOption",
"hook",
"HookProtocol",
"html_to_plain_text",
"JSON",
"keybind",
Expand All @@ -91,9 +92,9 @@
"Library",
"Mod",
"MODS_DIR",
"open_in_mod_dir",
"ModType",
"NestedOption",
"open_in_mod_dir",
"raw_keybinds",
"register_mod",
"remove_next_console_line_capture",
Expand Down
103 changes: 103 additions & 0 deletions src/mods_base/html_to_plain_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# ruff: noqa: D102

from dataclasses import dataclass
from functools import cache
from html.parser import HTMLParser


@dataclass
class OrderedList:
num: int = 1


@dataclass
class UnorderedList:
pass


class PlainTextHTMLConverter(HTMLParser):
plain_text: str
list_item_stack: list[OrderedList | UnorderedList]

def __init__(self) -> None:
super().__init__()

self.plain_text = ""
self.list_item_stack = []

def handle_data(self, data: str) -> None:
self.plain_text += data

def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
match tag.lower():
case "br":
self.plain_text += "\n"

case "ol":
self.plain_text += "\n"
self.list_item_stack.append(OrderedList())

case "ul":
self.plain_text += "\n"
self.list_item_stack.append(UnorderedList())

case "li":
if len(self.list_item_stack) >= 1:
list_state = self.list_item_stack[-1]
match list_state:
case OrderedList():
self.plain_text += f"{list_state.num}. "
list_state.num += 1
case UnorderedList():
self.plain_text += "- "

case "img":
for name, val in attrs:
if name.lower() == "alt" and val is not None:
self.plain_text += val
break

case _:
pass

def handle_endtag(self, tag: str) -> None:
match tag.lower():
case "ol":
if isinstance(self.list_item_stack[-1], OrderedList):
self.list_item_stack.pop()

case "ul":
if isinstance(self.list_item_stack[-1], UnorderedList):
self.list_item_stack.pop()

case "li":
self.plain_text += "\n"

case _:
pass


@cache
def html_to_plain_text(html: str) -> str:
"""
Extracts plain text from HTML-containing text. This is *NOT* input sanitisation.
Removes most tags in place, and decodes entities - `<b>&amp;</b>` becomes `&`.
A few tags are substituted for plain text equivalents:
- `<br>` becomes a newline
- `<ol><li>` becomes `1. ` (incrementing with each list item)
- `<ul><li>` becomes `- `
- `<img alt='xyz'>` becomes it's alt text
Intended for use when accessing a mod name/description/option/etc., which may contain HTML tags,
but in a situation where such tags would be inappropriate.
Args:
html: The HTML-containing text.
Returns:
The extracted plain text.
"""
parser = PlainTextHTMLConverter()
parser.feed(html)
return parser.plain_text
31 changes: 5 additions & 26 deletions src/mods_base/mod_list.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
from dataclasses import dataclass, field
from functools import cmp_to_key
from html.parser import HTMLParser
from pathlib import Path

import pyunrealsdk
Expand All @@ -10,6 +9,7 @@
from . import MODS_DIR, __version__
from .command import AbstractCommand
from .hook import HookProtocol
from .html_to_plain_text import html_to_plain_text
from .keybinds import KeybindType
from .mod import Game, Library, Mod, ModType
from .options import BaseOption, ButtonOption
Expand Down Expand Up @@ -56,9 +56,11 @@ def description(self) -> str:
# Once already sorted, re-sorting should be relatively quick
self.components.sort(key=lambda c: c.name.lower())

description = "Components:\n"
description = "Components:"
description += "<ul>"
for comp in self.components:
description += f"- {comp.name}: {comp.version}\n"
description += f"<li>{comp.name}: {comp.version}</li>"
description += "</ul>"

return description

Expand Down Expand Up @@ -118,29 +120,6 @@ def deregister_mod(mod: Mod) -> None:
mod_list.remove(mod)


def html_to_plain_text(html: str) -> str:
"""
Extracts plain text from HTML-containing text. This is *NOT* input sanitisation.
Removes tags, and decodes entities - `<b>&amp;</b>` becomes `&`.
Intended for use when accessing a mod name/description/option/etc., which may contain HTML tags,
but in a situation where such tags would be inappropriate.
Args:
html: The HTML-containing text.
Returns:
The extracted plain text.
"""
extracted_data: list[str] = []

parser = HTMLParser()
parser.handle_data = lambda data: extracted_data.append(data)
parser.feed(html)

return "".join(extracted_data)


def get_ordered_mod_list() -> list[Mod]:
"""
Gets the list of mods, in display order.
Expand Down

0 comments on commit 6b0d0ae

Please sign in to comment.