Skip to content

Commit

Permalink
♻️ Refactor backslash escape logic (#276)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
chrisjsewell and pre-commit-ci[bot] authored Jun 2, 2023
1 parent 500e69e commit ba96f34
Show file tree
Hide file tree
Showing 13 changed files with 173 additions and 67 deletions.
11 changes: 10 additions & 1 deletion markdown_it/parser_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
from __future__ import annotations

from .ruler import RuleFunc, Ruler
from .rules_core import block, inline, linkify, normalize, replace, smartquotes
from .rules_core import (
block,
inline,
linkify,
normalize,
replace,
smartquotes,
text_join,
)
from .rules_core.state_core import StateCore

_rules: list[tuple[str, RuleFunc]] = [
Expand All @@ -17,6 +25,7 @@
("linkify", linkify),
("replacements", replace),
("smartquotes", smartquotes),
("text_join", text_join),
]


Expand Down
9 changes: 8 additions & 1 deletion markdown_it/parser_inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,18 @@
("entity", rules_inline.entity),
]

# Note `rule2` ruleset was created specifically for emphasis/strikethrough
# post-processing and may be changed in the future.
#
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
#
_rules2: list[tuple[str, RuleFunc]] = [
("balance_pairs", rules_inline.link_pairs),
("strikethrough", rules_inline.strikethrough.postProcess),
("emphasis", rules_inline.emphasis.postProcess),
("text_collapse", rules_inline.text_collapse),
# rules for pairs separate '**' into its own text tokens, which may be left unused,
# rule below merges unused segments back with the rest of the text
("fragments_join", rules_inline.fragments_join),
]


Expand Down
4 changes: 2 additions & 2 deletions markdown_it/presets/commonmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def make() -> PresetType:
"highlight": None,
},
"components": {
"core": {"rules": ["normalize", "block", "inline"]},
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
"block": {
"rules": [
"blockquote",
Expand Down Expand Up @@ -68,7 +68,7 @@ def make() -> PresetType:
"newline",
"text",
],
"rules2": ["balance_pairs", "emphasis", "text_collapse"],
"rules2": ["balance_pairs", "emphasis", "fragments_join"],
},
},
}
7 changes: 5 additions & 2 deletions markdown_it/presets/zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ def make() -> PresetType:
"highlight": None,
},
"components": {
"core": {"rules": ["normalize", "block", "inline"]},
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
"block": {"rules": ["paragraph"]},
"inline": {"rules": ["text"], "rules2": ["balance_pairs", "text_collapse"]},
"inline": {
"rules": ["text"],
"rules2": ["balance_pairs", "fragments_join"],
},
},
}
2 changes: 2 additions & 0 deletions markdown_it/rules_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"replace",
"smartquotes",
"linkify",
"text_join",
)

from .block import block
Expand All @@ -15,3 +16,4 @@
from .replacements import replace
from .smartquotes import smartquotes
from .state_core import StateCore
from .text_join import text_join
34 changes: 34 additions & 0 deletions markdown_it/rules_core/text_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Join raw text tokens with the rest of the text
This is set as a separate rule to provide an opportunity for plugins
to run text replacements after text join, but before escape join.
For example, `\\:)` shouldn't be replaced with an emoji.
"""
from __future__ import annotations

from ..token import Token
from .state_core import StateCore


def text_join(state: StateCore) -> None:
"""Join raw text for escape sequences (`text_special`) tokens with the rest of the text"""

for inline_token in state.tokens[:]:
if inline_token.type != "inline":
continue

# convert text_special to text and join all adjacent text nodes
new_tokens: list[Token] = []
for child_token in inline_token.children or []:
if child_token.type == "text_special":
child_token.type = "text"
if (
child_token.type == "text"
and new_tokens
and new_tokens[-1].type == "text"
):
new_tokens[-1].content += child_token.content
else:
new_tokens.append(child_token)
inline_token.children = new_tokens
4 changes: 2 additions & 2 deletions markdown_it/rules_inline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__all__ = (
"StateInline",
"text",
"text_collapse",
"fragments_join",
"link_pairs",
"escape",
"newline",
Expand All @@ -20,10 +20,10 @@
from .balance_pairs import link_pairs
from .entity import entity
from .escape import escape
from .fragments_join import fragments_join
from .html_inline import html_inline
from .image import image
from .link import link
from .newline import newline
from .state_inline import StateInline
from .text import text
from .text_collapse import text_collapse
91 changes: 52 additions & 39 deletions markdown_it/rules_inline/escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,58 @@
from ..common.utils import isStrSpace
from .state_inline import StateInline


def escape(state: StateInline, silent: bool) -> bool:
"""Process escaped chars and hardbreaks."""
pos = state.pos
maximum = state.posMax

if state.src[pos] != "\\":
return False

pos += 1

# '\' at the end of the inline block
if pos >= maximum:
return False

ch1 = state.src[pos]
ch1_ord = ord(ch1)
if ch1 == "\n":
if not silent:
state.push("hardbreak", "br", 0)
pos += 1
# skip leading whitespaces from next line
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
break
pos += 1

state.pos = pos
return True

escapedStr = state.src[pos]

if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum:
ch2 = state.src[pos + 1]
ch2_ord = ord(ch2)
if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF:
escapedStr += ch2
pos += 1

origStr = "\\" + escapedStr

if not silent:
token = state.push("text_special", "", 0)
token.content = escapedStr if ch1 in _ESCAPED else origStr
token.markup = origStr
token.info = "escape"

state.pos = pos + 1
return True


_ESCAPED = {
"!",
'"',
Expand Down Expand Up @@ -38,42 +90,3 @@
"}",
"~",
}


def escape(state: StateInline, silent: bool) -> bool:
pos = state.pos
maximum = state.posMax

if state.src[pos] != "\\":
return False

pos += 1

if pos < maximum:
ch = state.src[pos]

if ch in _ESCAPED:
if not silent:
state.pending += state.src[pos]
state.pos += 2
return True

if ch == "\n":
if not silent:
state.push("hardbreak", "br", 0)

pos += 1
# skip leading whitespaces from next line
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
break
pos += 1

state.pos = pos
return True

if not silent:
state.pending += "\\"
state.pos += 1
return True
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .state_inline import StateInline


def text_collapse(state: StateInline) -> None:
def fragments_join(state: StateInline) -> None:
"""
Clean up tokens after emphasis and strikethrough postprocessing:
merge adjacent text nodes into one and re-calculate all token levels
Expand Down
35 changes: 18 additions & 17 deletions tests/test_api/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_get_rules():
"linkify",
"replacements",
"smartquotes",
"text_join",
],
"block": [
"table",
Expand Down Expand Up @@ -40,21 +41,21 @@ def test_get_rules():
"html_inline",
"entity",
],
"inline2": ["balance_pairs", "strikethrough", "emphasis", "text_collapse"],
"inline2": ["balance_pairs", "strikethrough", "emphasis", "fragments_join"],
}


def test_load_presets():
md = MarkdownIt("zero")
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
md = MarkdownIt("commonmark")
assert md.get_active_rules() == {
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"block": [
"code",
"fence",
Expand All @@ -79,7 +80,7 @@ def test_load_presets():
"html_inline",
"entity",
],
"inline2": ["balance_pairs", "emphasis", "text_collapse"],
"inline2": ["balance_pairs", "emphasis", "fragments_join"],
}


Expand All @@ -94,33 +95,33 @@ def test_enable():
md = MarkdownIt("zero").enable("heading")
assert md.get_active_rules() == {
"block": ["heading", "paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
md.enable(["backticks", "autolink"])
assert md.get_active_rules() == {
"block": ["heading", "paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text", "backticks", "autolink"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}


def test_disable():
md = MarkdownIt("zero").disable("inline")
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block"],
"core": ["normalize", "block", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
md.disable(["text"])
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block"],
"core": ["normalize", "block", "text_join"],
"inline": [],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}


Expand All @@ -130,15 +131,15 @@ def test_reset():
md.disable("inline")
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block"],
"core": ["normalize", "block", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}


Expand Down
9 changes: 8 additions & 1 deletion tests/test_port/fixtures/linkify.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,11 @@ after
<p>before</p>
<p><a href="http://github.com">github.com</a></p>
<p>after</p>
.
.

Don't match escaped
.
google\.com
.
<p>google.com</p>
.
15 changes: 14 additions & 1 deletion tests/test_port/fixtures/smartquotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,17 @@ Should parse quotes adjacent to inline html, #677:
.
<p>“test <br>”</p>
<p>“<br> test”</p>
.
.

Should be escapable:
.
"foo"

\"foo"

"foo\"
.
<p>“foo”</p>
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
.
Loading

0 comments on commit ba96f34

Please sign in to comment.