diff --git a/src/parser.ml b/src/parser.ml index 63429897..4b4437a0 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -904,33 +904,129 @@ module Pre = struct | Emph (_, _, Underscore, n) -> Text ([], String.make n '_') | R x -> x + let rec find_next_emph = function + | Emph (pre, post, style, n) :: _ -> Some (pre, post, style, n) + | _ :: xs -> find_next_emph xs + | [] -> None + + let rec find_next_closer_emph = function + | (Emph (pre, post, style, n) as e) :: _ when is_closer e -> + Some (pre, post, style, n) + | _ :: xs -> find_next_closer_emph xs + | [] -> None + + (* Checks the lengths of two different emphasis delimiters to see if there can be a match. + + From the spec: "If one of the delimiters can both open and close emphasis, then the sum of the lengths + of the delimiter runs containing the opening and closing delimiters must not be + a multiple of 3 unless both lengths are multiples of 3" *) + let is_emph_match n1 n2 = + (* + - *foo**bar**baz* + + *foo** -> the second delimiter ** is both an opening and closing delimiter. + The sum of the length of both delimiters is 3, so they can't be matched. + + **bar** -> they are both opening and closing delemiters. + Their sum is 4 which is not a multiple of 3 so they can be matched to produce bar + + The end result is: foobarbaz + + - *foo***bar**baz* + + *foo*** -> *** is both an opening and closing delimiter. + Their sum is 4 so they can be matched to produce: foo** + + **bar** -> they are both opening and closing delemiters. + Their sum is 4 which is not a multiple of 3 so they can be matched to produce bar + + The end result is: foobarbaz* + + - ***foo***bar**baz* + + ***foo*** -> the second delimiter *** is both an opening and closing delimiter. + Their sum is 6 which is a multiple of 3. However, both lengths are multiples of 3 + so they can be matched to produce: foo + + bar**baz* -> ** is both an opening and closing delimiter. + Their sum is 3 so they can't be matched + + The end result is: foobar**baz* + *) + if (n1 + n2) mod 3 = 0 && n1 mod 3 != 0 && n2 mod 3 != 0 then false + else true + let rec parse_emph = function - | (Emph (pre, _, q1, n1) as x) :: xs when is_opener x -> + | (Emph (pre, _, q1, n1) as x1) :: xs when is_opener x1 -> let rec loop acc = function - | (Emph (_, post, q2, n2) as x) :: xs when is_closer x && q1 = q2 -> - let xs = - if n1 >= 2 && n2 >= 2 then - if n2 > 2 then Emph (Punct, post, q2, n2 - 2) :: xs else xs - else if n2 > 1 then Emph (Punct, post, q2, n2 - 1) :: xs - else xs - in - let r = - let il = concat (List.map to_r (parse_emph (List.rev acc))) in - if n1 >= 2 && n2 >= 2 then R (Strong ([], il)) :: xs - else R (Emph ([], il)) :: xs - in - let r = - if n1 >= 2 && n2 >= 2 then - if n1 > 2 then Emph (pre, Punct, q1, n1 - 2) :: r else r - else if n1 > 1 then Emph (pre, Punct, q1, n1 - 1) :: r - else r + | (Emph (_, post, q2, n2) as x2) :: xs1 as xs + when is_closer x2 && q1 = q2 -> + (* At this point we have an openener followed by a closer. Both are of the same style (either * or _) *) + if (is_opener x2 || is_closer x1) && not (is_emph_match n1 n2) + then + (* + The second delimiter (the closer) is also an opener, and both delimiters don't match together, + according to the "mod 3" rule. In that case, we check if the next delimiter can match. + + *foo**bar**baz* The second delimiter that's both an opener/closer ( ** before bar) + matches with the next delimiter ( ** after bar). They'll become + bar. The end result will be: foobarbaz + + + *foo**bar*baz* The second delimiter that's both an opener/closer ( ** before bar) + doesn't match with the next delimiter ( * after bar). **bar will be + considered as regular text. The end result will be: foo**barbaz* + *) + match find_next_emph xs1 with + | Some (_, _, _, n3) when is_emph_match n3 n2 -> + let xs' = parse_emph xs in + loop acc xs' + | _ -> loop (x2 :: acc) xs1 + else + let xs = + if n1 >= 2 && n2 >= 2 then + if n2 > 2 then Emph (Other, post, q2, n2 - 2) :: xs1 + else xs1 + else if n2 > 1 then Emph (Punct, post, q2, n2 - 1) :: xs1 + else xs1 + in + let r = + let il = concat (List.map to_r (parse_emph (List.rev acc))) in + if n1 >= 2 && n2 >= 2 then R (Strong ([], il)) :: xs + else R (Emph ([], il)) :: xs + in + let r = + if n1 >= 2 && n2 >= 2 then + if n1 > 2 then Emph (pre, Other, q1, n1 - 2) :: r else r + else if n1 > 1 then Emph (pre, Punct, q1, n1 - 1) :: r + else r + in + parse_emph r + | (Emph (_, _, q2, _) as x2) :: xs1 as xs when is_opener x2 -> + (* + This case happens when we encounter a second opener delimiter. We look ahead for the next closer, + and if the next closer is of the same style, we can match them together. + + *foo _bar_ baz_ The second opener (_ before `bar`) is of the same style as the next closer + (_ after `bar`). We can match them to produce bar + The end result will be: *foo bar baz_ + + + *foo _bar* baz_ The second opener (_ before `bar`) is not of the same style as the next closer + ( * after `bar`). They can't be matched so we'll consider _bar as regular text. + The end result will be: foo _bar baz_ + *) + let is_next_closer_same = + match find_next_closer_emph xs1 with + | None -> false + | Some (_, _, q3, _) -> q2 = q3 in - parse_emph r - | (Emph _ as x) :: xs1 as xs when is_opener x -> - let xs' = parse_emph xs in - if xs' = xs then loop (x :: acc) xs1 else loop acc xs' + if not is_next_closer_same then loop (x2 :: acc) xs1 + else + let xs' = parse_emph xs in + if xs' = xs then loop (x2 :: acc) xs1 else loop acc xs' | x :: xs -> loop (x :: acc) xs - | [] -> x :: List.rev acc + | [] -> x1 :: List.rev acc in loop [] xs | x :: xs -> x :: parse_emph xs diff --git a/tests/blackbox/emphasis.t b/tests/blackbox/emphasis.t new file mode 100644 index 00000000..2609ff0d --- /dev/null +++ b/tests/blackbox/emphasis.t @@ -0,0 +1,6 @@ +Extra test not covered by the conformance tests + + $ omd << "MD" + > foo**bar* + > MD +

foo**bar*

diff --git a/tests/dune.inc b/tests/dune.inc index c9e4868d..7d324cf6 100644 --- a/tests/dune.inc +++ b/tests/dune.inc @@ -5103,8 +5103,13 @@ (alias spec-407) (alias spec-408) (alias spec-409) + (alias spec-410) + (alias spec-411) (alias spec-412) (alias spec-413) + (alias spec-414) + (alias spec-415) + (alias spec-416) (alias spec-417) (alias spec-418) (alias spec-419) @@ -5116,6 +5121,7 @@ (alias spec-425) (alias spec-426) (alias spec-427) + (alias spec-428) (alias spec-429) (alias spec-430) (alias spec-431) @@ -5155,6 +5161,8 @@ (alias spec-465) (alias spec-466) (alias spec-467) + (alias spec-468) + (alias spec-469) (alias spec-470) (alias spec-471) (alias spec-472) diff --git a/tests/extract_tests.ml b/tests/extract_tests.ml index ce727c9e..218ae460 100644 --- a/tests/extract_tests.ml +++ b/tests/extract_tests.ml @@ -8,8 +8,7 @@ let protect ~finally f = finally (); r -let disabled = - [ 206; 215; 216; 410; 411; 414; 415; 416; 428; 468; 469; 519; 539 ] +let disabled = [ 206; 215; 216; 519; 539 ] let with_open_in fn f = let ic = open_in fn in