Skip to content

Commit

Permalink
text: replace BoldSpan/ItalicSpan with text properties
Browse files Browse the repository at this point in the history
This representation is simpler to process and more flexible,
especially with overlapping spans of bold/italic.
  • Loading branch information
ricklupton committed Oct 5, 2023
1 parent 8760868 commit e0ea16b
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 101 deletions.
121 changes: 46 additions & 75 deletions src/rmscene/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,35 +68,31 @@ def expand_text_items(

@dataclass
class CrdtStr:
"""String with CrdtIds for chars and optional properties.
The properties apply to the whole `CrdtStr`. Use a list of
`CrdtStr`s to represent a sequence of spans of text with different
properties.
"""

s: str = ""
i: list[CrdtId] = field(default_factory=list)
properties: dict = field(default_factory=dict)

def __str__(self):
return self.s


@dataclass
class TextSpan:
"""Base class for text spans with formatting."""

contents: list[tp.Union["TextSpan", CrdtStr]]


class BoldSpan(TextSpan):
pass


class ItalicSpan(TextSpan):
pass


@dataclass
class Paragraph:
"""Paragraph of text."""

contents: list[TextSpan]
contents: list[CrdtStr]
start_id: CrdtId
style: LwwValue[si.ParagraphStyle]
style: LwwValue[si.ParagraphStyle] = field(
default_factory=lambda: LwwValue(CrdtId(0, 0), si.ParagraphStyle.PLAIN)
)

def __str__(self):
return "".join(str(s) for s in self.contents)
Expand All @@ -120,78 +116,53 @@ def from_scene_item(cls, text: si.Text):
# Expand from strings to characters
char_items = CrdtSequence(expand_text_items(text.items.sequence_items()))
keys = list(char_items)
last_linebreak = si.END_MARKER

span_start_codes = {
1: BoldSpan,
3: ItalicSpan,
}
span_end_codes = {
2: BoldSpan,
4: ItalicSpan,
}
properties = {"font-weight": "normal", "font-style": "normal"}

def handle_formatting_code(code):
if code == 1:
properties["font-weight"] = "bold"
elif code == 2:
properties["font-weight"] = "normal"
if code == 3:
properties["font-style"] = "italic"
elif code == 4:
properties["font-style"] = "normal"
else:
_logger.warning("Unknown formatting code in text: %d", code)
return properties

def parse_paragraph_contents():
nonlocal last_linebreak
stack = [(None, [])]
k = None
done = False
if keys and char_items[keys[0]] == "\n":
start_id = keys.pop(0)
else:
start_id = si.END_MARKER
contents = []
while keys:
# If we've seen a newline character, only interested in
# span-closing format codes.
if done and char_items[keys[0]] not in (2, 4):
break

k = keys.pop(0)
char = char_items[k]
char = char_items[keys[0]]
if isinstance(char, int):
if char in span_start_codes:
span_type = span_start_codes[char]
stack.append((span_type, []))
elif char in span_end_codes:
span_type, nested = stack.pop()
if span_type is not span_end_codes[char]:
_logger.error(
"Unexpected end of span at %s: got %s, expected %s",
k,
span_end_codes[char],
span_type,
)
if span_type is not None:
stack[-1][1].append(span_type(nested))
else:
_logger.warning("Unknown format code %d at %s!", char, k)
handle_formatting_code(char)
elif char == "\n":
# End of paragraph
done = True
last_linebreak = k
break
else:
assert len(char) <= 1
_, contents = stack[-1]
if not contents or not isinstance(contents[-1], CrdtStr):
contents += [CrdtStr()]
# Start a new string if text properties have changed
if not contents or contents[-1].properties != properties:
contents += [CrdtStr(properties=properties.copy())]
contents[-1].s += char
contents[-1].i += [k]
contents[-1].i += [keys[0]]
keys.pop(0)

if len(stack) > 1:
_logger.error("Unbalanced stack! %s", stack)

_, contents = stack[-1]
return contents
return start_id, contents

paragraphs = []
while keys:
style = text.styles.get(
last_linebreak, LwwValue(CrdtId(0, 0), si.ParagraphStyle.PLAIN)
)
contents = parse_paragraph_contents()
p = Paragraph(contents, last_linebreak, style)
start_id, contents = parse_paragraph_contents()
if start_id in text.styles:
p = Paragraph(contents, start_id, text.styles[start_id])
else:
p = Paragraph(contents, start_id)
paragraphs += [p]

doc = cls(paragraphs)
return doc

# if k in char_formats:
# current_format = char_formats[k]
# if char != "\n":
# _logger.warning("format does not apply to whole line")
183 changes: 181 additions & 2 deletions tests/test_text.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from rmscene.text import expand_text_item
from rmscene import CrdtId, CrdtSequenceItem
import pytest
from rmscene.text import (
expand_text_item,
expand_text_items,
TextDocument,
CrdtStr,
Paragraph,
)
from rmscene import scene_items as si
from rmscene import CrdtId, CrdtSequenceItem, CrdtSequence


def cid(k):
Expand Down Expand Up @@ -44,3 +52,174 @@ def test_expand_text_empty():
make_item(21, 20, 22, 1, ""),
make_item(22, 21, 0, 1, ""),
]


START_BOLD = 1
END_BOLD = 2
START_ITALIC = 3
END_ITALIC = 4


def doc_from_items(items):
root_text = si.Text(
items=CrdtSequence(items),
styles={},
pos_x=-468.0,
pos_y=234.0,
width=936.0,
)
doc = TextDocument.from_scene_item(root_text)
return doc


def test_inline_formatting_italic_over_paragraphs():
doc = doc_from_items(
[
make_item(20, 0, 0, 0, "A"),
make_item(21, 20, 0, 0, "B\nC"),
make_item(24, 23, 0, 0, "D"),
# Start italic between A and B
make_item(30, 20, 21, 0, START_ITALIC),
# End italic between C and D
make_item(31, 23, 24, 0, END_ITALIC),
]
)

assert doc.contents == [
Paragraph(
contents=[
CrdtStr(
"A",
[CrdtId(1, 20)],
{"font-weight": "normal", "font-style": "normal"},
),
CrdtStr(
"B",
[CrdtId(1, 21)],
{"font-weight": "normal", "font-style": "italic"},
),
],
start_id=CrdtId(0, 0),
),
Paragraph(
contents=[
CrdtStr(
"C",
[CrdtId(1, 23)],
{"font-weight": "normal", "font-style": "italic"},
),
CrdtStr(
"D",
[CrdtId(1, 24)],
{"font-weight": "normal", "font-style": "normal"},
),
],
start_id=CrdtId(1, 22),
),
]


def test_inline_formatting_italic_over_paragraphs():
doc = doc_from_items(
[
make_item(20, 0, 0, 0, "A"),
make_item(21, 20, 0, 0, "B\nC"),
make_item(24, 23, 0, 0, "D"),
# Start italic between A and B
make_item(30, 20, 21, 0, START_ITALIC),
# End italic between C and D
make_item(31, 23, 24, 0, END_ITALIC),
]
)

assert doc.contents == [
Paragraph(
contents=[
CrdtStr(
"A",
[CrdtId(1, 20)],
{"font-weight": "normal", "font-style": "normal"},
),
CrdtStr(
"B",
[CrdtId(1, 21)],
{"font-weight": "normal", "font-style": "italic"},
),
],
start_id=CrdtId(0, 0),
),
Paragraph(
contents=[
CrdtStr(
"C",
[CrdtId(1, 23)],
{"font-weight": "normal", "font-style": "italic"},
),
CrdtStr(
"D",
[CrdtId(1, 24)],
{"font-weight": "normal", "font-style": "normal"},
),
],
start_id=CrdtId(1, 22),
),
]


def test_inline_formatting_bold_italic_interleaved_over_paragraphs():
doc = doc_from_items(
[
make_item(20, 0, 0, 0, "ABC\nDEF"),
# Start italic between A and B
make_item(30, 20, 21, 0, START_ITALIC),
# Start bold between B and C
make_item(31, 21, 22, 0, START_BOLD),
# End italic between D and E
make_item(32, 24, 25, 0, END_ITALIC),
# End bold between E and F
make_item(33, 25, 26, 0, END_BOLD),
]
)

assert doc.contents == [
Paragraph(
contents=[
CrdtStr(
"A",
[CrdtId(1, 20)],
{"font-weight": "normal", "font-style": "normal"},
),
CrdtStr(
"B",
[CrdtId(1, 21)],
{"font-weight": "normal", "font-style": "italic"},
),
CrdtStr(
"C",
[CrdtId(1, 22)],
{"font-weight": "bold", "font-style": "italic"},
),
],
start_id=CrdtId(0, 0),
),
Paragraph(
contents=[
CrdtStr(
"D",
[CrdtId(1, 24)],
{"font-weight": "bold", "font-style": "italic"},
),
CrdtStr(
"E",
[CrdtId(1, 25)],
{"font-weight": "bold", "font-style": "normal"},
),
CrdtStr(
"F",
[CrdtId(1, 26)],
{"font-weight": "normal", "font-style": "normal"},
),
],
start_id=CrdtId(1, 23),
),
]
Loading

0 comments on commit e0ea16b

Please sign in to comment.