Skip to content

Commit

Permalink
Fix autolink parsing regression.
Browse files Browse the repository at this point in the history
This affects autolinks with doubled internal line-ending punctuation characters.
Closes #151.
  • Loading branch information
jgm committed Mar 11, 2024
1 parent 1ec2293 commit 900179b
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 25 deletions.
43 changes: 18 additions & 25 deletions commonmark-extensions/src/Commonmark/Extensions/Autolink.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,30 +48,32 @@ validDomain = do
skipMany1 $ try (symbol '.' >> domainPart)

linkPath :: Monad m => Int -> InlineParser m ()
linkPath openParens =
linkPath openParens = optional $ do
Tok tt _ _ <- lookAhead anyTok
case tt of
Symbol '&' -> optional $
try (symbol '&' *>
notFollowedBy
(try (satisfyWord (const True) *> symbol ';' *> linkEnd)) *>
linkPath openParens)
<|> (pathPunctuation *> linkPath openParens)
<|> (symbol '(' *> linkPath (openParens + 1))
<|> (guard (openParens > 0) *> symbol ')' *> linkPath (openParens - 1))
-- the following clause is needed to implement the GFM spec, which allows
-- unbalanced ) except at link end. However, leaving this in causes
-- problematic interaction with explicit link syntax in certain odd cases (see #147).
-- <|> (notFollowedBy linkEnd *> symbol ')' *> linkPath (openParens - 1))
<|> (satisfyTok (\t -> case tokType t of
LineEnd -> False
Spaces -> False
Symbol c -> not (isTrailingPunctuation c || c == '&' || c == ')')
_ -> True) *> linkPath openParens)
<|> pure ()
Symbol '(' -> symbol '(' *> linkPath (openParens + 1)
Symbol ')' -> optional $ guard (openParens > 0) *> symbol ')' *> linkPath (openParens - 1)
Symbol '<' -> pure ()
Symbol c | isTrailingPunctuation c -> optional $
try (do skipMany1 trailingPunctuation
pos <- getPosition
linkPath openParens
pos' <- getPosition
guard (pos' > pos)) *> linkPath openParens
LineEnd -> pure ()
Spaces -> pure ()
_ -> anyTok *> linkPath openParens

linkEnd :: Monad m => InlineParser m ()
linkEnd = try $ skipMany trailingPunctuation *> (void whitespace <|> eof)

trailingPunctuation :: Monad m => InlineParser m ()
trailingPunctuation = void $
trailingPunctuation :: Monad m => InlineParser m Tok
trailingPunctuation =
satisfyTok (\t -> case tokType t of
Symbol c -> isTrailingPunctuation c
_ -> False)
Expand All @@ -80,15 +82,6 @@ isTrailingPunctuation :: Char -> Bool
isTrailingPunctuation =
(`elem` ['!', '"', '\'', ')', '*', ',', '.', ':', ';', '?', '_', '~', '<'])

pathPunctuation :: Monad m => InlineParser m ()
pathPunctuation = try $ do
satisfyTok (\t -> case tokType t of
Symbol c -> isTrailingPunctuation c && c /= ')' && c /= '<'
_ -> False)
void $ lookAhead (satisfyTok (\t -> case tokType t of
WordChars -> True
_ -> False))

urlAutolink :: Monad m => InlineParser m Text
urlAutolink = try $ do
satisfyWord (`elem` ["http", "https", "ftp"])
Expand Down
14 changes: 14 additions & 0 deletions commonmark-extensions/test/autolinks.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,17 @@ The autolinks extension should not interfere with regular links
.
<p><a href="http://www.google.com/">a link</a>stuff?</p>
````````````````````````````````

Autolinks with punctuation (#151):

```````````````````````````````` example
https://en.wikipedia.org/wiki/St._Petersburg_paradox
https://en.wikipedia.org/wiki/Liaison_(French)
https://en.wikipedia.org/wiki/Frederick_III,_German_Emperor
.
<p><a href="https://en.wikipedia.org/wiki/St._Petersburg_paradox">https://en.wikipedia.org/wiki/St._Petersburg_paradox</a></p>
<p><a href="https://en.wikipedia.org/wiki/Liaison_(French)">https://en.wikipedia.org/wiki/Liaison_(French)</a></p>
<p><a href="https://en.wikipedia.org/wiki/Frederick_III,_German_Emperor">https://en.wikipedia.org/wiki/Frederick_III,_German_Emperor</a></p>
````````````````````````````````

0 comments on commit 900179b

Please sign in to comment.