Skip to content

Commit

Permalink
👌 Improve nested emphasis parsing (#273)
Browse files Browse the repository at this point in the history
This fixes quadratic complexity in e.g. `**<...>**a**<...>**`

Implementation of upstream commit: markdown-it/markdown-it@24abaa5
  • Loading branch information
chrisjsewell authored Jun 2, 2023
1 parent 4e6dfd5 commit eb96da1
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 26 deletions.
6 changes: 3 additions & 3 deletions markdown_it/port.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
- package: markdown-it/markdown-it
version: 12.2.0
commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283
date: Aug 2, 2021
version: 12.3.0
commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8
date: Dec 9, 2021
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
Expand Down
43 changes: 32 additions & 11 deletions markdown_it/rules_inline/balance_pairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,33 @@

def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
"""For each opening emphasis-like marker find a matching closing one."""
if not delimiters:
return

openersBottom = {}
maximum = len(delimiters)

# headerIdx is the first delimiter of the current (where closer is) delimiter run
headerIdx = 0
lastTokenIdx = -2 # needs any value lower than -1
jumps: list[int] = []
closerIdx = 0
while closerIdx < maximum:
closer = delimiters[closerIdx]

jumps.append(0)

# markers belong to same delimiter run if:
# - they have adjacent tokens
# - AND markers are the same
#
if (
delimiters[headerIdx].marker != closer.marker
or lastTokenIdx != closer.token - 1
):
headerIdx = closerIdx
lastTokenIdx = closer.token

# Length is only used for emphasis-specific "rule of 3",
# if it's not defined (in strikethrough or 3rd party plugins),
# we can default it to 0 to disable those checks.
Expand All @@ -34,20 +54,15 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
(3 if closer.open else 0) + (closer.length % 3)
]

openerIdx = closerIdx - closer.jump - 1

# avoid crash if `closer.jump` is pointing outside of the array,
# e.g. for strikethrough
if openerIdx < -1:
openerIdx = -1
openerIdx = headerIdx - jumps[headerIdx] - 1

newMinOpenerIdx = openerIdx

while openerIdx > minOpenerIdx:
opener = delimiters[openerIdx]

if opener.marker != closer.marker:
openerIdx -= opener.jump + 1
openerIdx -= jumps[openerIdx] + 1
continue

if opener.open and opener.end < 0:
Expand All @@ -73,19 +88,25 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
#
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
lastJump = delimiters[openerIdx - 1].jump + 1
lastJump = jumps[openerIdx - 1] + 1
else:
lastJump = 0

closer.jump = closerIdx - openerIdx + lastJump
jumps[closerIdx] = closerIdx - openerIdx + lastJump
jumps[openerIdx] = lastJump

closer.open = False
opener.end = closerIdx
opener.jump = lastJump
opener.close = False
newMinOpenerIdx = -1

# treat next token as start of run,
# it optimizes skips in **<...>**a**<...>** pathological case
lastTokenIdx = -2

break

openerIdx -= opener.jump + 1
openerIdx -= jumps[openerIdx] + 1

if newMinOpenerIdx != -1:
# If match for this delimiter run failed, we want to set lower bound for
Expand Down
7 changes: 4 additions & 3 deletions markdown_it/rules_inline/emphasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@ def tokenize(state: StateInline, silent: bool) -> bool:

scanned = state.scanDelims(state.pos, marker == "*")

for i in range(scanned.length):
for _ in range(scanned.length):
token = state.push("text", "", 0)
token.content = marker
state.delimiters.append(
Delimiter(
marker=ord(marker),
length=scanned.length,
jump=i,
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
Expand Down Expand Up @@ -63,9 +62,11 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
isStrong = (
i > 0
and delimiters[i - 1].end == startDelim.end + 1
# check that first two markers match and adjacent
and delimiters[i - 1].marker == startDelim.marker
and delimiters[i - 1].token == startDelim.token - 1
# check that last two markers are adjacent (we can safely assume they match)
and delimiters[startDelim.end + 1].token == endDelim.token + 1
and delimiters[i - 1].marker == startDelim.marker
)

ch = chr(startDelim.marker)
Expand Down
8 changes: 0 additions & 8 deletions markdown_it/rules_inline/state_inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@ class Delimiter:
# Total length of these series of delimiters.
length: int

# An amount of characters before this one that's equivalent to
# current one. In plain English: if this delimiter does not open
# an emphasis, neither do previous `jump` characters.
#
# Used to skip sequences like "*****" in one step, for 1st asterisk
# value will be 0, for 2nd it's 1 and so on.
jump: int

# A position of the token this delimiter corresponds to.
token: int

Expand Down
1 change: 0 additions & 1 deletion markdown_it/rules_inline/strikethrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def tokenize(state: StateInline, silent: bool) -> bool:
Delimiter(
marker=ord(ch),
length=0, # disable "rule of 3" length checks meant for emphasis
jump=i // 2, # for `~~` 1 marker = 2 characters
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
Expand Down

0 comments on commit eb96da1

Please sign in to comment.