From db89e782f2d99851b7eb7c637e2d4124d66402be Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 11:13:46 +0300 Subject: [PATCH 1/8] tests: add baseline and failing. --- src/tests/core.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/tests/core.rs b/src/tests/core.rs index 3b5adefa..34c1643d 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -495,3 +495,35 @@ fn case_insensitive_safety() { "

a b c d e f g

\n", ); } + +#[test] +fn link_sourcepos_baseline() { + assert_ast_match!( + [], + "[ABCD](/)", + (document (1:1-1:9) [ + (paragraph (1:1-1:9) [ + (link (1:1-1:9) [ + (text (1:2-1:5) "ABCD") + ]) + ]) + ]) + ); +} + +#[test] +fn link_sourcepos_newline() { + assert_ast_match!( + [], + "[AB\nCD](/)", + (document (1:1-2:6) [ + (paragraph (1:1-2:6) [ + (link (1:1-2:6) [ + (text (1:2-1:3) "AB") + (softbreak (1:4-1:4)) + (text (2:1-2:2) "CD") + ]) + ]) + ]) + ); +} From 9a4262c1a195a54485be386fa143ad88fb8c568e Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 11:16:43 +0300 Subject: [PATCH 2/8] inlines: set not just SC but whole SP based on opener. --- src/parser/inlines.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index a22a4874..33407e4e 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -1648,13 +1648,12 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { self.pos, self.pos, ); - inl.data.borrow_mut().sourcepos.start.column = self.brackets[brackets_len - 1] + inl.data.borrow_mut().sourcepos.start = self.brackets[brackets_len - 1] .inl_text .data .borrow() .sourcepos - .start - .column; + .start; inl.data.borrow_mut().sourcepos.end.column = usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize) .unwrap(); From 63b967e4769a6e7343764dcb835c9f59082a6c7f Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 11:26:10 +0300 Subject: [PATCH 3/8] tests: this last case is abysmal. Actually it looks like all the link stuff is just +2, probably because of the indent-but-not-really-indent. --- src/tests/core.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/tests/core.rs b/src/tests/core.rs index 34c1643d..4dfc46b0 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -500,7 +500,7 @@ fn case_insensitive_safety() { fn link_sourcepos_baseline() { assert_ast_match!( [], - "[ABCD](/)", + "[ABCD](/)\n", (document (1:1-1:9) [ (paragraph (1:1-1:9) [ (link (1:1-1:9) [ @@ -511,11 +511,12 @@ fn link_sourcepos_baseline() { ); } +// https://github.com/kivikakk/comrak/issues/301 #[test] fn link_sourcepos_newline() { assert_ast_match!( [], - "[AB\nCD](/)", + "[AB\nCD](/)\n", (document (1:1-2:6) [ (paragraph (1:1-2:6) [ (link (1:1-2:6) [ @@ -527,3 +528,26 @@ fn link_sourcepos_newline() { ]) ); } + +#[test] +fn link_sourcepos_truffle() { + assert_ast_match!( + [], + "- A\n[![B](/B.png)](/B)\n", + (document (1:1-2:18) [ + (list (1:1-2:18) [ + (item (1:1-2:18) [ + (paragraph (1:3-2:18) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (link (2:1-2:18) [ + (image (2:2-2:13) [ + (text (2:4-2:4) "B") + ]) + ]) + ]) + ]) + ]) + ]) + ); +} From c27194906ea3f05b9e071b20f47f0d7093a2b812 Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 11:31:39 +0300 Subject: [PATCH 4/8] tests: note this *works* and must remain. --- src/tests/core.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/tests/core.rs b/src/tests/core.rs index 4dfc46b0..5851233b 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -551,3 +551,26 @@ fn link_sourcepos_truffle() { ]) ); } + +#[test] +fn link_sourcepos_truffle_twist() { + assert_ast_match!( + [], + "- A\n [![B](/B.png)](/B)\n", + (document (1:1-2:20) [ + (list (1:1-2:20) [ + (item (1:1-2:20) [ + (paragraph (1:3-2:20) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (link (2:3-2:20) [ + (image (2:4-2:15) [ + (text (2:6-2:6) "B") + ]) + ]) + ]) + ]) + ]) + ]) + ); +} From 69ebedd45d169f23280263dc18db4635b203accd Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 11:43:31 +0300 Subject: [PATCH 5/8] tests: another case which misses. --- src/tests/core.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/tests/core.rs b/src/tests/core.rs index 5851233b..832eeec9 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -574,3 +574,26 @@ fn link_sourcepos_truffle_twist() { ]) ); } + +#[test] +fn link_sourcepos_truffle_bergamot() { + assert_ast_match!( + [], + "- A\n [![B](/B.png)](/B)\n", + (document (1:1-2:21) [ + (list (1:1-2:21) [ + (item (1:1-2:21) [ + (paragraph (1:3-2:21) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (link (2:4-2:21) [ + (image (2:5-2:16) [ + (text (2:7-2:7) "B") + ]) + ]) + ]) + ]) + ]) + ]) + ); +} From 21d97febda7b4c90bd4272c6971fb6a19bef600e Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 11:56:27 +0300 Subject: [PATCH 6/8] tests: ignore some failures. --- src/tests/core.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tests/core.rs b/src/tests/core.rs index 832eeec9..0ed9419b 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -529,6 +529,8 @@ fn link_sourcepos_newline() { ); } +// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960. +#[ignore] #[test] fn link_sourcepos_truffle() { assert_ast_match!( @@ -575,6 +577,8 @@ fn link_sourcepos_truffle_twist() { ); } +// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960. +#[ignore] #[test] fn link_sourcepos_truffle_bergamot() { assert_ast_match!( From cc17182c99da32c284ebe6aeb4db54b9710c185a Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 12:11:23 +0300 Subject: [PATCH 7/8] html: don't report sourcepos on inlines. The information is still there in the AST, and e.g. XML output will still include it. --- src/html.rs | 76 ++++++++++++++------------------- src/tests/escaped_char_spans.rs | 13 ++---- src/tests/fuzz.rs | 2 +- src/tests/wikilinks.rs | 13 +++--- 4 files changed, 41 insertions(+), 63 deletions(-) diff --git a/src/html.rs b/src/html.rs index 55bf5bc0..6c521720 100644 --- a/src/html.rs +++ b/src/html.rs @@ -725,33 +725,31 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Text(ref literal) => { + // No sourcepos. if entering { self.escape(literal.as_bytes())?; } } NodeValue::LineBreak => { + // No sourcepos. if entering { - self.output.write_all(b"\n")?; + self.output.write_all(b"
\n")?; } } NodeValue::SoftBreak => { + // No sourcepos. if entering { if self.options.render.hardbreaks { - self.output.write_all(b"\n")?; + self.output.write_all(b"
\n")?; } else { self.output.write_all(b"\n")?; } } } NodeValue::Code(NodeCode { ref literal, .. }) => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; self.escape(literal.as_bytes())?; self.output.write_all(b"")?; } @@ -773,52 +771,47 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Strong => { + // No sourcepos. let parent_node = node.parent(); if !self.options.render.gfm_quirks || (parent_node.is_none() || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong)) { if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } } NodeValue::Emph => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Strikethrough => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Superscript => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Link(ref nl) => { + // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Image(ref nl) => { + // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteDefinition(ref nfd) => { + // No sourcepos. if entering { if self.footnote_ix == 0 { - self.output.write_all(b"\n
    \n")?; + .write_all(b"
    \n
      \n")?; } self.footnote_ix += 1; - self.output.write_all(b"")?; } else { @@ -970,18 +959,14 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteReference(ref nfr) => { + // No sourcepos. if entering { let mut ref_id = format!("fnref-{}", nfr.name); - - self.output.write_all(b" 1 { ref_id = format!("{}-{}", ref_id, nfr.ref_num); } - self.output - .write_all(b" class=\"footnote-ref\"> HtmlFormatter<'o, 'c> { } } NodeValue::Escaped => { + // No sourcepos. if self.options.render.escaped_char_spans { if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } @@ -1040,10 +1024,9 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::WikiLink(ref nl) => { + // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Underline => { + // No sourcepos. if entering { self.output.write_all(b"")?; } else { @@ -1062,6 +1046,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::SpoileredText => { + // No sourcepos. if entering { self.output.write_all(b"")?; } else { @@ -1069,6 +1054,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::EscapedTag(ref net) => { + // No sourcepos. self.output.write_all(net.as_bytes())?; } } diff --git a/src/tests/escaped_char_spans.rs b/src/tests/escaped_char_spans.rs index 7e2d747b..5b8bbf33 100644 --- a/src/tests/escaped_char_spans.rs +++ b/src/tests/escaped_char_spans.rs @@ -1,17 +1,10 @@ use super::*; use ntest::test_case; -// html_opts! does a roundtrip check unless sourcepos is set. -// These cases don't work roundtrip, because converting to commonmark -// automatically escapes certain characters. -#[test_case("\\@user", "

      @user

      \n")] -#[test_case("This\\@that", "

      This@that

      \n")] +#[test_case("\\@user", "

      @user

      \n")] +#[test_case("This\\@that", "

      This@that

      \n")] fn escaped_char_spans(markdown: &str, html: &str) { - html_opts!( - [render.escaped_char_spans, render.sourcepos], - markdown, - html - ); + html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip); } #[test_case("\\@user", "

      @user

      \n")] diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs index 47cd1fb6..21223729 100644 --- a/src/tests/fuzz.rs +++ b/src/tests/fuzz.rs @@ -47,7 +47,7 @@ fn footnote_def() { render.hardbreaks ], "\u{15}\u{b}\r[^ ]:", - "

      \u{15}\u{b}
      \n[^ ]:

      \n", + "

      \u{15}\u{b}
      \n[^ ]:

      \n", ); } diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 19b14445..afcaa07e 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -1,20 +1,19 @@ use super::*; -// html_opts! does a roundtrip check unless sourcepos is set. -// These cases don't work roundtrip, because converting to commonmark -// automatically escapes certain characters. #[test] fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( - [extension.wikilinks_title_after_pipe, render.sourcepos], + [extension.wikilinks_title_after_pipe], concat!("This is [[<script>alert(0)</script>|a <link]]",), - concat!("

      This is a <link

      \n"), + concat!("

      This is a <link

      \n"), + no_roundtrip, ); html_opts!( - [extension.wikilinks_title_before_pipe, render.sourcepos], + [extension.wikilinks_title_before_pipe], concat!("This is [[a <link|<script>alert(0)</script>]]",), - concat!("

      This is a <link

      \n"), + concat!("

      This is a <link

      \n"), + no_roundtrip, ); } From 983180b957e712b5f5280450a12fe4d66aaee38f Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 12:17:32 +0300 Subject: [PATCH 8/8] parser: document sourcepos reliability. --- src/parser/mod.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0993dadf..b6987a97 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -755,9 +755,14 @@ pub struct RenderOptions { /// ``` pub list_style: ListStyleType, - /// Include source position attributes in XML output. + /// Include source position attributes in HTML and XML output. /// - /// Not yet compatible with extension.description_lists. + /// Sourcepos information is reliable for all core block items, and most + /// extensions. The description lists extension still has issues; see + /// . + /// + /// Sourcepos information is **not** reliable for inlines. See + /// for a discussion. /// /// ```rust /// # use comrak::{markdown_to_commonmark_xml, Options};