diff --git a/README.md b/README.md index 7c9fa346..cdeb220e 100644 --- a/README.md +++ b/README.md @@ -193,7 +193,7 @@ fn replace_text(document: &str, orig_string: &str, replacement: &str) -> String for node in root.descendants() { if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { // If the node is a text node, perform the string replacement. - *text = text.replace(orig_string, replacement) + *text = text.replace(orig_string, replacement); } } diff --git a/fuzz/fuzz_targets/all_options.rs b/fuzz/fuzz_targets/all_options.rs index 22e8f40d..131c932a 100644 --- a/fuzz/fuzz_targets/all_options.rs +++ b/fuzz/fuzz_targets/all_options.rs @@ -3,9 +3,10 @@ use libfuzzer_sys::fuzz_target; use comrak::{ - markdown_to_html, ExtensionOptions, Options, ParseOptions, - RenderOptions, ListStyleType, + markdown_to_html, BrokenLinkReference, ExtensionOptions, ListStyleType, Options, ParseOptions, + RenderOptions, ResolvedReference, }; +use std::sync::{Arc, Mutex}; fuzz_target!(|s: &str| { let mut extension = ExtensionOptions::default(); @@ -18,22 +19,29 @@ fuzz_target!(|s: &str| { extension.header_ids = Some("user-content-".to_string()); extension.footnotes = true; extension.description_lists = true; + extension.front_matter_delimiter = Some("---".to_string()); extension.multiline_block_quotes = true; extension.math_dollars = true; extension.math_code = true; - extension.front_matter_delimiter = Some("---".to_string()); extension.shortcodes = true; extension.wikilinks_title_after_pipe = true; extension.wikilinks_title_before_pipe = true; extension.underline = true; extension.spoiler = true; extension.greentext = true; - + let mut parse = ParseOptions::default(); parse.smart = true; parse.default_info_string = Some("rust".to_string()); parse.relaxed_tasklist_matching = true; parse.relaxed_autolinks = true; + let mut cb = |link_ref: BrokenLinkReference| { + Some(ResolvedReference { + url: link_ref.normalized.to_string(), + title: link_ref.original.to_string(), + }) + }; + parse.broken_link_callback = Some(Arc::new(Mutex::new(&mut cb))); let mut render = RenderOptions::default(); render.hardbreaks = true; @@ -47,9 +55,15 @@ fuzz_target!(|s: &str| { render.escaped_char_spans = true; render.ignore_setext = true; render.ignore_empty_links = true; + render.gfm_quirks = true; + render.prefer_fenced = true; markdown_to_html( s, - &Options { extension, parse, render }, + &Options { + extension, + parse, + render, + }, ); }); diff --git a/fuzz/fuzz_targets/cli_default.rs b/fuzz/fuzz_targets/cli_default.rs index 91d703f9..a821c81d 100644 --- a/fuzz/fuzz_targets/cli_default.rs +++ b/fuzz/fuzz_targets/cli_default.rs @@ -2,9 +2,7 @@ use libfuzzer_sys::fuzz_target; -use comrak::{ - markdown_to_html_with_plugins, plugins::syntect::SyntectAdapter, Plugins, -}; +use comrak::{markdown_to_html_with_plugins, plugins::syntect::SyntectAdapter, Plugins}; // Note that we end up fuzzing Syntect here. diff --git a/fuzz/fuzz_targets/quadratic.rs b/fuzz/fuzz_targets/quadratic.rs index 4fcbe47a..2c0addbc 100644 --- a/fuzz/fuzz_targets/quadratic.rs +++ b/fuzz/fuzz_targets/quadratic.rs @@ -2,9 +2,8 @@ #![feature(int_roundings)] #![no_main] use comrak::{ - markdown_to_html, markdown_to_commonmark, markdown_to_commonmark_xml, - ExtensionOptions, Options, ParseOptions, - RenderOptions, ListStyleType, + markdown_to_commonmark, markdown_to_commonmark_xml, markdown_to_html, ExtensionOptions, + ListStyleType, Options, ParseOptions, RenderOptions, }; use libfuzzer_sys::arbitrary::{self, Arbitrary}; use libfuzzer_sys::fuzz_target; @@ -297,18 +296,10 @@ fn fuzz_one_input(input: &Input, num_bytes: usize) -> (usize, Duration, f64) { let duration_per_byte = duration.as_secs_f64() / (byte_length as f64); if DEBUG { - println!( - "do_one: {} bytes, duration = {:?}", - byte_length, - duration - ); + println!("do_one: {} bytes, duration = {:?}", byte_length, duration); } - ( - byte_length, - duration, - duration_per_byte - ) + (byte_length, duration, duration_per_byte) } /// The maximum number of steps to run in the main fuzzing loop below. diff --git a/src/cm.rs b/src/cm.rs index d251c320..7e1da213 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -47,9 +47,9 @@ pub fn format_document_with_plugins<'a>( Ok(()) } -struct CommonMarkFormatter<'a, 'o> { +struct CommonMarkFormatter<'a, 'o, 'c> { node: &'a AstNode<'a>, - options: &'o Options, + options: &'o Options<'c>, v: Vec, prefix: Vec, column: usize, @@ -72,7 +72,7 @@ enum Escaping { Title, } -impl<'a, 'o> Write for CommonMarkFormatter<'a, 'o> { +impl<'a, 'o, 'c> Write for CommonMarkFormatter<'a, 'o, 'c> { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.output(buf, false, Escaping::Literal); Ok(buf.len()) @@ -83,8 +83,8 @@ impl<'a, 'o> Write for CommonMarkFormatter<'a, 'o> { } } -impl<'a, 'o> CommonMarkFormatter<'a, 'o> { - fn new(node: &'a AstNode<'a>, options: &'o Options) -> Self { +impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> { + fn new(node: &'a AstNode<'a>, options: &'o Options<'c>) -> Self { CommonMarkFormatter { node, options, diff --git a/src/html.rs b/src/html.rs index 94ac4698..55bf5bc0 100644 --- a/src/html.rs +++ b/src/html.rs @@ -63,25 +63,24 @@ impl<'w> Write for WriteWithLast<'w> { } } -/// Converts header Strings to canonical, unique, but still human-readable, anchors. +/// Converts header strings to canonical, unique, but still human-readable, +/// anchors. /// -/// To guarantee uniqueness, an anchorizer keeps track of the anchors -/// it has returned. So, for example, to parse several MarkDown -/// files, use a new anchorizer per file. +/// To guarantee uniqueness, an anchorizer keeps track of the anchors it has +/// returned; use one per output file. /// /// ## Example /// /// ``` -/// use comrak::Anchorizer; -/// +/// # use comrak::Anchorizer; /// let mut anchorizer = Anchorizer::new(); -/// /// // First "stuff" is unsuffixed. /// assert_eq!("stuff".to_string(), anchorizer.anchorize("Stuff".to_string())); /// // Second "stuff" has "-1" appended to make it unique. /// assert_eq!("stuff-1".to_string(), anchorizer.anchorize("Stuff".to_string())); /// ``` #[derive(Debug, Default)] +#[doc(hidden)] pub struct Anchorizer(HashSet); impl Anchorizer { @@ -96,12 +95,9 @@ impl Anchorizer { /// resultant anchor unique. /// /// ``` - /// use comrak::Anchorizer; - /// + /// # use comrak::Anchorizer; /// let mut anchorizer = Anchorizer::new(); - /// /// let source = "Ticks aren't in"; - /// /// assert_eq!("ticks-arent-in".to_string(), anchorizer.anchorize(source.to_string())); /// ``` pub fn anchorize(&mut self, header: String) -> String { @@ -130,9 +126,9 @@ impl Anchorizer { } } -struct HtmlFormatter<'o> { +struct HtmlFormatter<'o, 'c> { output: &'o mut WriteWithLast<'o>, - options: &'o Options, + options: &'o Options<'c>, anchorizer: Anchorizer, footnote_ix: u32, written_footnote_ix: u32, @@ -365,8 +361,12 @@ where Ok(()) } -impl<'o> HtmlFormatter<'o> { - fn new(options: &'o Options, output: &'o mut WriteWithLast<'o>, plugins: &'o Plugins) -> Self { +impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { + fn new( + options: &'o Options<'c>, + output: &'o mut WriteWithLast<'o>, + plugins: &'o Plugins, + ) -> Self { HtmlFormatter { options, output, diff --git a/src/lib.rs b/src/lib.rs index cf52f896..a5eb4407 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,7 @@ //! A 100% [CommonMark](http://commonmark.org/) and [GFM](https://github.github.com/gfm/) -//! compatible Markdown parser. Source repository is at . +//! compatible Markdown parser. //! -//! The design is based on [cmark-gfm](https://github.com/github/cmark-gfm), so -//! familiarity with that will help. +//! Source repository and detailed `README` is at . //! //! You can use `comrak::markdown_to_html` directly: //! @@ -20,32 +19,19 @@ //! use comrak::nodes::{AstNode, NodeValue}; //! //! # fn main() { -//! // The returned nodes are created in the supplied Arena, and are bound by its lifetime. //! let arena = Arena::new(); //! //! let root = parse_document( //! &arena, -//! "This is my input.\n\n1. Also my input.\n2. Certainly my input.\n", +//! "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n", //! &Options::default()); //! -//! fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) -//! where F : Fn(&'a AstNode<'a>) { -//! f(node); -//! for c in node.children() { -//! iter_nodes(c, f); +//! for node in root.descendants() { +//! if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { +//! *text = text.replace("my", "your"); //! } //! } //! -//! iter_nodes(root, &|node| { -//! match &mut node.data.borrow_mut().value { -//! &mut NodeValue::Text(ref mut text) => { -//! let orig = std::mem::replace(text, String::new()); -//! *text = orig.replace("my", "your"); -//! } -//! _ => (), -//! } -//! }); -//! //! let mut html = vec![]; //! format_html(root, &Options::default(), &mut html).unwrap(); //! @@ -53,8 +39,8 @@ //! String::from_utf8(html).unwrap(), //! "

This is your input.

\n\ //!
    \n\ -//!
  1. Also your input.
  2. \n\ -//!
  3. Certainly your input.
  4. \n\ +//!
  5. Also your input.
  6. \n\ +//!
  7. Certainly your input.
  8. \n\ //!
\n"); //! # } //! ``` @@ -98,11 +84,15 @@ pub use cm::format_document as format_commonmark; pub use cm::format_document_with_plugins as format_commonmark_with_plugins; pub use html::format_document as format_html; pub use html::format_document_with_plugins as format_html_with_plugins; +#[doc(inline)] pub use html::Anchorizer; +#[allow(deprecated)] +pub use parser::parse_document_with_broken_link_callback; pub use parser::{ - parse_document, parse_document_with_broken_link_callback, ExtensionOptions, + parse_document, BrokenLinkCallback, BrokenLinkReference, ExtensionOptions, ExtensionOptionsBuilder, ListStyleType, Options, ParseOptions, ParseOptionsBuilder, Plugins, PluginsBuilder, RenderOptions, RenderOptionsBuilder, RenderPlugins, RenderPluginsBuilder, + ResolvedReference, }; pub use typed_arena::Arena; pub use xml::format_document as format_xml; @@ -111,9 +101,9 @@ pub use xml::format_document_with_plugins as format_xml_with_plugins; /// Legacy naming of [`ExtensionOptions`] pub type ComrakExtensionOptions = ExtensionOptions; /// Legacy naming of [`Options`] -pub type ComrakOptions = Options; +pub type ComrakOptions<'c> = Options<'c>; /// Legacy naming of [`ParseOptions`] -pub type ComrakParseOptions = ParseOptions; +pub type ComrakParseOptions<'c> = ParseOptions<'c>; /// Legacy naming of [`Plugins`] pub type ComrakPlugins<'a> = Plugins<'a>; /// Legacy naming of [`RenderOptions`] diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index f288ec50..a22a4874 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -8,7 +8,9 @@ use crate::nodes::{ use crate::parser::autolink; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; -use crate::parser::{unwrap_into_2, unwrap_into_copy, AutolinkType, Callback, Options, Reference}; +use crate::parser::{ + unwrap_into_2, unwrap_into_copy, AutolinkType, BrokenLinkReference, Options, ResolvedReference, +}; use crate::scanners; use crate::strings::{self, is_blank, Case}; use std::cell::{Cell, RefCell}; @@ -23,9 +25,9 @@ const MAXBACKTICKS: usize = 80; const MAX_LINK_LABEL_LENGTH: usize = 1000; const MAX_MATH_DOLLARS: usize = 2; -pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i, 'c: 'subj, 'subj> { +pub struct Subject<'a: 'd, 'r, 'o, 'c, 'd, 'i> { pub arena: &'a Arena>, - options: &'o Options, + options: &'o Options<'c>, pub input: &'i [u8], line: usize, pub pos: usize, @@ -43,10 +45,6 @@ pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i, 'c: 'subj, 'subj> { special_chars: [bool; 256], skip_chars: [bool; 256], smart_chars: [bool; 256], - // Need to borrow the callback from the parser only for the lifetime of the Subject, 'subj, and - // then give it back when the Subject goes out of scope. Needs to be a mutable reference so we - // can call the FnMut and let it mutate its captured variables. - callback: Option<&'subj mut Callback<'c>>, } #[derive(Default)] @@ -58,7 +56,7 @@ struct Flags { } pub struct RefMap { - pub map: HashMap, + pub map: HashMap, pub(crate) max_ref_size: usize, ref_size: usize, } @@ -72,7 +70,7 @@ impl RefMap { } } - fn lookup(&mut self, lab: &str) -> Option { + fn lookup(&mut self, lab: &str) -> Option { match self.map.get(lab) { Some(entry) => { let size = entry.url.len() + entry.title.len(); @@ -112,16 +110,15 @@ struct WikilinkComponents<'i> { link_label: Option<(&'i [u8], usize, usize)>, } -impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { +impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { pub fn new( arena: &'a Arena>, - options: &'o Options, + options: &'o Options<'c>, input: &'i [u8], line: usize, block_offset: usize, refmap: &'r mut RefMap, delimiter_arena: &'d Arena>, - callback: Option<&'subj mut Callback<'c>>, ) -> Self { let mut s = Subject { arena, @@ -143,7 +140,6 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { special_chars: [false; 256], skip_chars: [false; 256], smart_chars: [false; 256], - callback, }; for &c in &[ b'\n', b'\r', b'_', b'*', b'"', b'`', b'\\', b'&', b'<', b'[', b']', b'!', b'$', @@ -1530,6 +1526,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } // Need to normalize both to lookup in refmap and to call callback + let unfolded_lab = lab.to_owned(); let lab = strings::normalize_label(&lab, Case::Fold); let mut reff = if found_label { self.refmap.lookup(&lab) @@ -1539,8 +1536,11 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { // Attempt to use the provided broken link callback if a reference cannot be resolved if reff.is_none() { - if let Some(ref mut callback) = self.callback { - reff = callback(&lab).map(|(url, title)| Reference { url, title }); + if let Some(callback) = &self.options.parse.broken_link_callback { + reff = callback.lock().unwrap()(BrokenLinkReference { + normalized: &lab, + original: &unfolded_lab, + }); } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e9d68a3c..0993dadf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -22,9 +22,10 @@ use derive_builder::Builder; use std::cell::RefCell; use std::cmp::min; use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; +use std::fmt::{self, Debug, Formatter}; use std::mem; use std::str; +use std::sync::{Arc, Mutex}; use typed_arena::Arena; use crate::adapters::HeadingAdapter; @@ -57,55 +58,6 @@ pub fn parse_document<'a>( arena: &'a Arena>, buffer: &str, options: &Options, -) -> &'a AstNode<'a> { - parse_document_with_broken_link_callback(arena, buffer, options, None) -} - -/// Parse a Markdown document to an AST. -/// -/// In case the parser encounters any potential links that have a broken reference (e.g `[foo]` -/// when there is no `[foo]: url` entry at the bottom) the provided callback will be called with -/// the reference name, and the returned pair will be used as the link destination and title if not -/// None. -/// -/// **Note:** The label provided to the callback is the normalized representation of the label as -/// described in the [GFM spec](https://github.github.com/gfm/#matches). -/// -/// ``` -/// use comrak::{Arena, parse_document_with_broken_link_callback, format_html, Options}; -/// use comrak::nodes::{AstNode, NodeValue}; -/// -/// # fn main() -> std::io::Result<()> { -/// // The returned nodes are created in the supplied Arena, and are bound by its lifetime. -/// let arena = Arena::new(); -/// -/// let root = parse_document_with_broken_link_callback( -/// &arena, -/// "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.", -/// &Options::default(), -/// Some(&mut |link_ref: &str| match link_ref { -/// "foo" => Some(( -/// "https://www.rust-lang.org/".to_string(), -/// "The Rust Language".to_string(), -/// )), -/// _ => None, -/// }), -/// ); -/// -/// let mut output = Vec::new(); -/// format_html(root, &Options::default(), &mut output)?; -/// let output_str = std::str::from_utf8(&output).expect("invalid UTF-8"); -/// assert_eq!(output_str, "

Cool input!

\n

Wow look at this cool \ -/// link. \ -/// A [broken link] renders as text.

\n"); -/// # Ok(()) -/// # } -/// ``` -pub fn parse_document_with_broken_link_callback<'a, 'c>( - arena: &'a Arena>, - buffer: &str, - options: &Options, - callback: Option>, ) -> &'a AstNode<'a> { let root: &'a AstNode<'a> = arena.alloc(Node::new(RefCell::new(Ast { value: NodeValue::Document, @@ -116,13 +68,51 @@ pub fn parse_document_with_broken_link_callback<'a, 'c>( last_line_blank: false, table_visited: false, }))); - let mut parser = Parser::new(arena, root, options, callback); + let mut parser = Parser::new(arena, root, options); let mut linebuf = Vec::with_capacity(buffer.len()); parser.feed(&mut linebuf, buffer, true); parser.finish(linebuf) } -type Callback<'c> = &'c mut dyn FnMut(&str) -> Option<(String, String)>; +/// Parse a Markdown document to an AST, specifying +/// [`ParseOptions::broken_link_callback`]. +#[deprecated( + since = "0.25.0", + note = "The broken link callback has been moved into ParseOptions<'c>." +)] +pub fn parse_document_with_broken_link_callback<'a, 'c>( + arena: &'a Arena>, + buffer: &str, + options: &Options<'c>, + callback: Option>, +) -> &'a AstNode<'a> { + let mut options_with_callback = options.clone(); + options_with_callback.parse.broken_link_callback = callback.map(|cb| Arc::new(Mutex::new(cb))); + parse_document(arena, buffer, &options_with_callback) +} + +/// The type of the callback used when a reference link is encountered with no +/// matching reference. +/// +/// The details of the broken reference are passed in the +/// [`BrokenLinkReference`] argument. If a [`ResolvedReference`] is returned, it +/// is used as the link; otherwise, no link is made and the reference text is +/// preserved in its entirety. +pub type BrokenLinkCallback<'c> = + &'c mut dyn FnMut(BrokenLinkReference) -> Option; + +/// Struct to the broken link callback, containing details on the link reference +/// which failed to find a match. +#[derive(Debug)] +pub struct BrokenLinkReference<'l> { + /// The normalized reference link label. Unicode case folding is applied; + /// see for a + /// discussion on the details of what this exactly means. + pub normalized: &'l str, + + /// The original text in the link label. + pub original: &'l str, +} pub struct Parser<'a, 'o, 'c> { arena: &'a Arena>, @@ -143,19 +133,19 @@ pub struct Parser<'a, 'o, 'c> { last_line_length: usize, last_buffer_ended_with_cr: bool, total_size: usize, - options: &'o Options, - callback: Option>, + options: &'o Options<'c>, } #[derive(Default, Debug, Clone)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -/// Umbrella options struct. -pub struct Options { +/// Umbrella options struct. `'c` represents the lifetime of any callback +/// closure options may take. +pub struct Options<'c> { /// Enable CommonMark extensions. pub extension: ExtensionOptions, /// Configure parse-time options. - pub parse: ParseOptions, + pub parse: ParseOptions<'c>, /// Configure render-time options. pub render: RenderOptions, @@ -517,11 +507,11 @@ pub struct ExtensionOptions { } #[non_exhaustive] -#[derive(Default, Debug, Clone, Builder)] +#[derive(Default, Clone, Builder)] #[builder(default)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] /// Options for parser functions. -pub struct ParseOptions { +pub struct ParseOptions<'c> { /// Punctuation (quotes, full-stops and hyphens) are converted into 'smart' punctuation. /// /// ``` @@ -568,6 +558,66 @@ pub struct ParseOptions { /// "

[https://foo.com]

\n"); /// ``` pub relaxed_autolinks: bool, + + /// In case the parser encounters any potential links that have a broken + /// reference (e.g `[foo]` when there is no `[foo]: url` entry at the + /// bottom) the provided callback will be called with the reference name, + /// both in normalized form and unmodified, and the returned pair will be + /// used as the link destination and title if not [`None`]. + /// + /// ``` + /// # use std::{str, sync::{Arc, Mutex}}; + /// # use comrak::{Arena, ResolvedReference, parse_document, format_html, Options, BrokenLinkReference, ParseOptionsBuilder}; + /// # use comrak::nodes::{AstNode, NodeValue}; + /// # + /// # fn main() -> std::io::Result<()> { + /// let arena = Arena::new(); + /// let mut cb = |link_ref: BrokenLinkReference| match link_ref.normalized { + /// "foo" => Some(ResolvedReference { + /// url: "https://www.rust-lang.org/".to_string(), + /// title: "The Rust Language".to_string(), + /// }), + /// _ => None, + /// }; + /// let options = Options { + /// parse: ParseOptionsBuilder::default() + /// .broken_link_callback(Some(Arc::new(Mutex::new(&mut cb)))) + /// .build() + /// .unwrap(), + /// ..Default::default() + /// }; + /// + /// let root = parse_document( + /// &arena, + /// "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.", + /// &options, + /// ); + /// + /// let mut output = Vec::new(); + /// format_html(root, &Options::default(), &mut output)?; + /// assert_eq!(str::from_utf8(&output).unwrap(), + /// "

Cool input!

\n

Wow look at this cool \ + /// link. \ + /// A [broken link] renders as text.

\n"); + /// # Ok(()) + /// # } + #[cfg_attr(feature = "arbitrary", arbitrary(default))] + pub broken_link_callback: Option>>>, +} + +impl<'c> fmt::Debug for ParseOptions<'c> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let mut struct_fmt = f.debug_struct("ParseOptions"); + struct_fmt.field("smart", &self.smart); + struct_fmt.field("default_info_string", &self.default_info_string); + struct_fmt.field("relaxed_tasklist_matching", &self.relaxed_tasklist_matching); + struct_fmt.field("relaxed_autolinks", &self.relaxed_autolinks); + struct_fmt.field( + "broken_link_callback.is_some()", + &self.broken_link_callback.is_some(), + ); + struct_fmt.finish() + } } #[non_exhaustive] @@ -684,9 +734,9 @@ pub struct RenderOptions { /// Set the type of [bullet list marker](https://spec.commonmark.org/0.30/#bullet-list-marker) to use. Options are: /// - /// * `ListStyleType::Dash` to use `-` (default) - /// * `ListStyleType::Plus` to use `+` - /// * `ListStyleType::Star` to use `*` + /// * [`ListStyleType::Dash`] to use `-` (default) + /// * [`ListStyleType::Plus`] to use `+` + /// * [`ListStyleType::Star`] to use `*` /// /// ```rust /// # use comrak::{markdown_to_commonmark, Options, ListStyleType}; @@ -871,9 +921,13 @@ impl Debug for RenderPlugins<'_> { } } -#[derive(Clone)] -pub struct Reference { +/// A reference link's resolved details. +#[derive(Clone, Debug)] +pub struct ResolvedReference { + /// The destination URL of the reference link. pub url: String, + + /// The text of the link. pub title: String, } @@ -884,13 +938,8 @@ struct FootnoteDefinition<'a> { total_references: u32, } -impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { - fn new( - arena: &'a Arena>, - root: &'a AstNode<'a>, - options: &'o Options, - callback: Option>, - ) -> Self { +impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { + fn new(arena: &'a Arena>, root: &'a AstNode<'a>, options: &'o Options<'c>) -> Self { Parser { arena, refmap: RefMap::new(), @@ -911,7 +960,6 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { last_buffer_ended_with_cr: false, total_size: 0, options, - callback, } } @@ -2119,7 +2167,6 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { node_data.sourcepos.start.column - 1 + node_data.internal_offset, &mut self.refmap, &delimiter_arena, - self.callback.as_mut(), ); while subj.parse_inline(node) {} @@ -2374,7 +2421,6 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { 0, &mut self.refmap, &delimiter_arena, - self.callback.as_mut(), ); let mut lab: String = match subj.link_label() { @@ -2428,7 +2474,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { lab = strings::normalize_label(&lab, Case::Fold); if !lab.is_empty() { - subj.refmap.map.entry(lab).or_insert(Reference { + subj.refmap.map.entry(lab).or_insert(ResolvedReference { url: String::from_utf8(strings::clean_url(url)).unwrap(), title: String::from_utf8(strings::clean_title(&title)).unwrap(), }); @@ -2594,7 +2640,7 @@ pub enum AutolinkType { #[derive(Debug, Clone, Copy, Default)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -/// Options for bulleted list redering in markdown. See `link_style` in [RenderOptions] for more details. +/// Options for bulleted list redering in markdown. See `link_style` in [`RenderOptions`] for more details. pub enum ListStyleType { /// The `-` character #[default] diff --git a/src/tests/api.rs b/src/tests/api.rs index b7a4dcb1..50e9daaa 100644 --- a/src/tests/api.rs +++ b/src/tests/api.rs @@ -1,3 +1,7 @@ +use std::sync::{Arc, Mutex}; + +use parser::BrokenLinkReference; + use crate::{ adapters::{HeadingAdapter, HeadingMeta, SyntaxHighlighterAdapter}, nodes::Sourcepos, @@ -27,11 +31,22 @@ fn exercise_full_api() { let _: &AstNode = parse_document(&arena, "document", &default_options); + // Ensure the closure can modify its context. + let mut blr_ctx_0 = 0; + #[allow(deprecated)] let _: &AstNode = parse_document_with_broken_link_callback( &arena, "document", - &default_options, - Some(&mut |_: &str| Some(("abc".to_string(), "xyz".to_string()))), + &Options::default(), + Some(&mut |blr: BrokenLinkReference| { + blr_ctx_0 += 1; + let _: &str = blr.normalized; + let _: &str = blr.original; + Some(ResolvedReference { + url: String::new(), + title: String::new(), + }) + }), ); let mut extension = ExtensionOptionsBuilder::default(); @@ -60,6 +75,18 @@ fn exercise_full_api() { parse.default_info_string(Some("abc".to_string())); parse.relaxed_tasklist_matching(false); parse.relaxed_autolinks(false); + let mut blr_ctx_1 = 0; + parse.broken_link_callback(Some(Arc::new(Mutex::new( + &mut |blr: BrokenLinkReference| { + blr_ctx_1 += 1; + let _: &str = blr.normalized; + let _: &str = blr.original; + Some(ResolvedReference { + url: String::new(), + title: String::new(), + }) + }, + )))); let mut render = RenderOptionsBuilder::default(); render.hardbreaks(false); diff --git a/src/tests/options.rs b/src/tests/options.rs index 307c525c..88db857d 100644 --- a/src/tests/options.rs +++ b/src/tests/options.rs @@ -1,3 +1,5 @@ +use std::sync::{Arc, Mutex}; + use super::*; #[test] @@ -62,3 +64,38 @@ fn smart_chars() { "

Hm. Hm.. hm… yes- indeed– quite—!

\n", ); } + +#[test] +fn broken_link_callback() { + let arena = Arena::new(); + + let mut cb = |link_ref: BrokenLinkReference| match link_ref.normalized { + "foo" => Some(ResolvedReference { + url: "https://www.rust-lang.org/".to_string(), + title: "The Rust Language".to_string(), + }), + _ => None, + }; + let options = Options { + parse: ParseOptionsBuilder::default() + .broken_link_callback(Some(Arc::new(Mutex::new(&mut cb)))) + .build() + .unwrap(), + ..Default::default() + }; + + let root = parse_document( + &arena, + "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.", + &options, + ); + let mut output = Vec::new(); + format_html(root, &Options::default(), &mut output).unwrap(); + let output_str = std::str::from_utf8(&output).unwrap(); + assert_eq!( + output_str, + "

Cool input!

\n

Wow look at this cool \ + link. \ + A [broken link] renders as text.

\n" + ); +} diff --git a/src/xml.rs b/src/xml.rs index 76f5eb05..cfc4603d 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -30,15 +30,15 @@ pub fn format_document_with_plugins<'a>( XmlFormatter::new(options, output, plugins).format(root, false) } -struct XmlFormatter<'o> { +struct XmlFormatter<'o, 'c> { output: &'o mut dyn Write, - options: &'o Options, + options: &'o Options<'c>, _plugins: &'o Plugins<'o>, indent: u32, } -impl<'o> XmlFormatter<'o> { - fn new(options: &'o Options, output: &'o mut dyn Write, plugins: &'o Plugins) -> Self { +impl<'o, 'c> XmlFormatter<'o, 'c> { + fn new(options: &'o Options<'c>, output: &'o mut dyn Write, plugins: &'o Plugins) -> Self { XmlFormatter { options, output,