Skip to content

Commit

Permalink
html: only show inline sourcepos when asked for.
Browse files Browse the repository at this point in the history
  • Loading branch information
kivikakk committed Jul 12, 2024
1 parent c5f5f2d commit 1dc3fed
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 40 deletions.
84 changes: 66 additions & 18 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,32 +725,42 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Text(ref literal) => {
// Nowhere to put sourcepos.
if entering {
self.escape(literal.as_bytes())?;
}
}
NodeValue::LineBreak => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<br")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b" />\n")?;
}
}
NodeValue::SoftBreak => {
// Unreliable sourcepos.
if entering {
if self.options.render.hardbreaks {
self.output.write_all(b"<br")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b" />\n")?;
} else {
self.output.write_all(b"\n")?;
}
}
}
NodeValue::Code(NodeCode { ref literal, .. }) => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<code")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
self.escape(literal.as_bytes())?;
self.output.write_all(b"</code>")?;
Expand All @@ -773,51 +783,66 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Strong => {
// Unreliable sourcepos.
let parent_node = node.parent();
if !self.options.render.gfm_quirks
|| (parent_node.is_none()
|| !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong))
{
if entering {
self.output.write_all(b"<strong")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
} else {
self.output.write_all(b"</strong>")?;
}
}
}
NodeValue::Emph => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<em")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
} else {
self.output.write_all(b"</em>")?;
}
}
NodeValue::Strikethrough => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<del")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
} else {
self.output.write_all(b"</del>")?;
}
}
NodeValue::Superscript => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<sup")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
} else {
self.output.write_all(b"</sup>")?;
}
}
NodeValue::Link(ref nl) => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<a")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b" href=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
Expand All @@ -833,9 +858,12 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Image(ref nl) => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<img")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b" src=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
Expand All @@ -853,6 +881,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
#[cfg(feature = "shortcodes")]
NodeValue::ShortCode(ref nsc) => {
// Nowhere to put sourcepos.
if entering {
self.output.write_all(nsc.emoji.as_bytes())?;
}
Expand Down Expand Up @@ -970,16 +999,17 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::FootnoteReference(ref nfr) => {
// Unreliable sourcepos.
if entering {
let mut ref_id = format!("fnref-{}", nfr.name);

self.output.write_all(b"<sup")?;
self.render_sourcepos(node)?;

if nfr.ref_num > 1 {
ref_id = format!("{}-{}", ref_id, nfr.ref_num);
}

self.output.write_all(b"<sup")?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output
.write_all(b" class=\"footnote-ref\"><a href=\"#fn-")?;
self.escape_href(nfr.name.as_bytes())?;
Expand Down Expand Up @@ -1019,10 +1049,13 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Escaped => {
// Unreliable sourcepos.
if self.options.render.escaped_char_spans {
if entering {
self.output.write_all(b"<span data-escaped-char")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
} else {
self.output.write_all(b"</span>")?;
Expand All @@ -1040,9 +1073,12 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::WikiLink(ref nl) => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<a")?;
self.render_sourcepos(node)?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b" href=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
Expand All @@ -1055,20 +1091,31 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Underline => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<u>")?;
self.output.write_all(b"<u")?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b">")?;
} else {
self.output.write_all(b"</u>")?;
}
}
NodeValue::SpoileredText => {
// Unreliable sourcepos.
if entering {
self.output.write_all(b"<span class=\"spoiler\">")?;
self.output.write_all(b"<span")?;
if self.options.render.experimental_inline_sourcepos {
self.render_sourcepos(node)?;
}
self.output.write_all(b" class=\"spoiler\">")?;
} else {
self.output.write_all(b"</span>")?;
}
}
NodeValue::EscapedTag(ref net) => {
// Nowhere to put sourcepos.
self.output.write_all(net.as_bytes())?;
}
}
Expand Down Expand Up @@ -1128,7 +1175,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {

tag_attributes.push((String::from("data-math-style"), String::from(style_attr)));

if self.options.render.sourcepos {
// Unreliable sourcepos.
if self.options.render.experimental_inline_sourcepos && self.options.render.sourcepos {
let ast = node.data.borrow();
tag_attributes.push(("data-sourcepos".to_string(), ast.sourcepos.to_string()));
}
Expand Down
24 changes: 20 additions & 4 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -761,19 +761,35 @@ pub struct RenderOptions {
/// extensions. The description lists extension still has issues; see
/// <https://github.com/kivikakk/comrak/blob/3bb6d4ce/src/tests/description_lists.rs#L60-L125>.
///
/// Sourcepos information is **not** reliable for inlines. See
/// <https://github.com/kivikakk/comrak/pull/439> for a discussion.
/// Sourcepos information is **not** reliable for inlines, and is not
/// included in HTML without also setting [`experimental_inline_sourcepos`].
/// See <https://github.com/kivikakk/comrak/pull/439> for a discussion.
///
/// ```rust
/// # use comrak::{markdown_to_commonmark_xml, Options};
/// let mut options = Options::default();
/// options.render.sourcepos = true;
/// let input = "Hello *world*!";
/// let input = "## Hello world!";
/// let xml = markdown_to_commonmark_xml(input, &options);
/// assert!(xml.contains("<emph sourcepos=\"1:7-1:13\">"));
/// assert!(xml.contains("<text sourcepos=\"1:4-1:15\" xml:space=\"preserve\">"));
/// ```
pub sourcepos: bool,

/// Include inline sourcepos in HTML output, which is known to have issues.
/// See <https://github.com/kivikakk/comrak/pull/439> for a discussion.
/// ```rust
/// # use comrak::{markdown_to_html, Options};
/// let mut options = Options::default();
/// options.render.sourcepos = true;
/// let input = "Hello *world*!";
/// assert_eq!(markdown_to_html(input, &options),
/// "<p data-sourcepos=\"1:1-1:14\">Hello <em>world</em>!</p>\n");
/// options.render.experimental_inline_sourcepos = true;
/// assert_eq!(markdown_to_html(input, &options),
/// "<p data-sourcepos=\"1:1-1:14\">Hello <em data-sourcepos=\"1:7-1:13\">world</em>!</p>\n");
/// ```
pub experimental_inline_sourcepos: bool,

/// Wrap escaped characters in a `<span>` to allow any
/// post-processing to recognize them.
///
Expand Down
13 changes: 3 additions & 10 deletions src/tests/escaped_char_spans.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
use super::*;
use ntest::test_case;

// html_opts! does a roundtrip check unless sourcepos is set.
// These cases don't work roundtrip, because converting to commonmark
// automatically escapes certain characters.
#[test_case("\\@user", "<p data-sourcepos=\"1:1-1:6\"><span data-escaped-char data-sourcepos=\"1:1-1:2\">@</span>user</p>\n")]
#[test_case("This\\@that", "<p data-sourcepos=\"1:1-1:10\">This<span data-escaped-char data-sourcepos=\"1:5-1:6\">@</span>that</p>\n")]
#[test_case("\\@user", "<p><span data-escaped-char>@</span>user</p>\n")]
#[test_case("This\\@that", "<p>This<span data-escaped-char>@</span>that</p>\n")]
fn escaped_char_spans(markdown: &str, html: &str) {
html_opts!(
[render.escaped_char_spans, render.sourcepos],
markdown,
html
);
html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip);
}

#[test_case("\\@user", "<p>@user</p>\n")]
Expand Down
2 changes: 1 addition & 1 deletion src/tests/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn footnote_def() {
render.hardbreaks
],
"\u{15}\u{b}\r[^ ]:",
"<p data-sourcepos=\"1:1-2:5\">\u{15}\u{b}<br data-sourcepos=\"1:3-1:3\" />\n[^ ]:</p>\n",
"<p data-sourcepos=\"1:1-2:5\">\u{15}\u{b}<br />\n[^ ]:</p>\n",
);
}

Expand Down
13 changes: 6 additions & 7 deletions src/tests/wikilinks.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
use super::*;

// html_opts! does a roundtrip check unless sourcepos is set.
// These cases don't work roundtrip, because converting to commonmark
// automatically escapes certain characters.
#[test]
fn wikilinks_does_not_unescape_html_entities_in_link_label() {
html_opts!(
[extension.wikilinks_title_after_pipe, render.sourcepos],
[extension.wikilinks_title_after_pipe],
concat!("This is [[&lt;script&gt;alert(0)&lt;/script&gt;|a &lt;link]]",),
concat!("<p data-sourcepos=\"1:1-1:60\">This is <a data-sourcepos=\"1:9-1:60\" href=\"%3Cscript%3Ealert(0)%3C/script%3E\" data-wikilink=\"true\">a &lt;link</a></p>\n"),
concat!("<p>This is <a href=\"%3Cscript%3Ealert(0)%3C/script%3E\" data-wikilink=\"true\">a &lt;link</a></p>\n"),
no_roundtrip,
);

html_opts!(
[extension.wikilinks_title_before_pipe, render.sourcepos],
[extension.wikilinks_title_before_pipe],
concat!("This is [[a &lt;link|&lt;script&gt;alert(0)&lt;/script&gt;]]",),
concat!("<p data-sourcepos=\"1:1-1:60\">This is <a data-sourcepos=\"1:9-1:60\" href=\"%3Cscript%3Ealert(0)%3C/script%3E\" data-wikilink=\"true\">a &lt;link</a></p>\n"),
concat!("<p>This is <a href=\"%3Cscript%3Ealert(0)%3C/script%3E\" data-wikilink=\"true\">a &lt;link</a></p>\n"),
no_roundtrip,
);
}

Expand Down

0 comments on commit 1dc3fed

Please sign in to comment.