From be56d3484fe9c76e3f907d569bef9af9270a81a2 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 15 Sep 2022 13:43:22 +0200 Subject: [PATCH] refactor(rome_formatter): Move verbatim formatting to `rome_formatter` This PR moves the logic for formatting a node as `verbatim` to `rome_formatter` as it isn't JS specific --- crates/rome_formatter/src/lib.rs | 2 +- crates/rome_formatter/src/verbatim.rs | 173 +++++++ crates/rome_js_formatter/src/builders.rs | 552 +---------------------- crates/rome_js_formatter/src/lib.rs | 2 +- 4 files changed, 179 insertions(+), 550 deletions(-) create mode 100644 crates/rome_formatter/src/verbatim.rs diff --git a/crates/rome_formatter/src/lib.rs b/crates/rome_formatter/src/lib.rs index ff9bf3178ec..f8258fdad59 100644 --- a/crates/rome_formatter/src/lib.rs +++ b/crates/rome_formatter/src/lib.rs @@ -36,7 +36,7 @@ pub mod prelude; pub mod printed_tokens; pub mod printer; mod source_map; -pub mod token; +mod verbatim; use crate::formatter::Formatter; use crate::group_id::UniqueGroupIdBuilder; diff --git a/crates/rome_formatter/src/verbatim.rs b/crates/rome_formatter/src/verbatim.rs new file mode 100644 index 00000000000..dcf35785335 --- /dev/null +++ b/crates/rome_formatter/src/verbatim.rs @@ -0,0 +1,173 @@ +use crate::cst::{FormatLeadingComments, FormatTrailingComments}; +use crate::prelude::*; +use crate::VecBuffer; +use crate::{write, CstFormatContext}; +use rome_rowan::{Direction, Language, SyntaxElement, SyntaxNode, TextRange}; + +/// "Formats" a node according to its original formatting in the source text. Being able to format +/// a node "as is" is useful if a node contains syntax errors. Formatting a node with syntax errors +/// has the risk that Rome misinterprets the structure of the code and formatting it could +/// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors. +/// +/// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes +/// nor its children source mapping information, resulting in incorrect source maps for this subtree. +/// +/// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand +/// if these nodes still need to have their own implementation. +pub fn format_verbatim_node(node: &SyntaxNode) -> FormatVerbatimNode { + FormatVerbatimNode { + node, + kind: VerbatimKind::Verbatim { + length: node.text_range().len(), + }, + format_comments: true, + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct FormatVerbatimNode<'node, L: Language> { + node: &'node SyntaxNode, + kind: VerbatimKind, + format_comments: bool, +} + +impl Format for FormatVerbatimNode<'_, Context::Language> +where + Context: CstFormatContext, +{ + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + for element in self.node.descendants_with_tokens(Direction::Next) { + match element { + SyntaxElement::Token(token) => f.state_mut().track_token(&token), + SyntaxElement::Node(node) => { + f.context().comments().mark_suppression_checked(&node); + } + } + } + + // The trimmed range of a node is its range without any of its leading or trailing trivia. + // Except for nodes that used to be parenthesized, the range than covers the source from the + // `(` to the `)` (the trimmed range of the parenthesized expression, not the inner expression) + let trimmed_source_range = f.context().source_map().map_or_else( + || self.node.text_trimmed_range(), + |source_map| source_map.trimmed_source_range(self.node), + ); + + let mut buffer = VecBuffer::new(f.state_mut()); + + write!( + buffer, + [format_with(|f: &mut Formatter| { + fn source_range(f: &Formatter, range: TextRange) -> TextRange + where + Context: CstFormatContext, + { + f.context() + .source_map() + .map_or_else(|| range, |source_map| source_map.source_range(range)) + } + + // Format all leading comments that are outside of the node's source range. + if self.format_comments { + let comments = f.context().comments().clone(); + let leading_comments = comments.leading_comments(self.node); + + let outside_trimmed_range = leading_comments.partition_point(|comment| { + comment.piece().text_range().end() <= trimmed_source_range.start() + }); + + write!( + f, + [FormatLeadingComments::Comments( + &leading_comments[..outside_trimmed_range] + )] + )?; + } + + // Find the first skipped token trivia, if any, and include it in the verbatim range because + // the comments only format **up to** but not including skipped token trivia. + let start_source = self + .node + .first_leading_trivia() + .into_iter() + .flat_map(|trivia| trivia.pieces()) + .filter(|trivia| trivia.is_skipped()) + .map(|trivia| source_range(f, trivia.text_range()).start()) + .take_while(|start| *start < trimmed_source_range.start()) + .next() + .unwrap_or_else(|| trimmed_source_range.start()); + + let original_source = f.context().source_map().map_or_else( + || self.node.text_trimmed().to_string(), + |source_map| { + source_map.text()[trimmed_source_range.cover_offset(start_source)] + .to_string() + }, + ); + + dynamic_text( + &normalize_newlines(&original_source, LINE_TERMINATORS), + self.node.text_trimmed_range().start(), + ) + .fmt(f)?; + + // Format all trailing comments that are outside of the trimmed range. + if self.format_comments { + let comments = f.context().comments().clone(); + + let trailing_comments = comments.trailing_comments(self.node); + + let outside_trimmed_range_start = + trailing_comments.partition_point(|comment| { + source_range(f, comment.piece().text_range()).end() + <= trimmed_source_range.end() + }); + + write!( + f, + [FormatTrailingComments::Comments( + &trailing_comments[outside_trimmed_range_start..] + )] + )?; + } + + Ok(()) + })] + )?; + + let content = buffer.into_vec(); + + let verbatim = Verbatim { + content: content.into_boxed_slice(), + kind: self.kind, + }; + + f.write_element(FormatElement::Verbatim(verbatim)) + } +} + +impl FormatVerbatimNode<'_, L> { + pub fn skip_comments(mut self) -> Self { + self.format_comments = false; + self + } +} + +/// Formats unknown nodes. The difference between this method and `format_verbatim` is that this method +/// doesn't track nodes/tokens as [FormatElement::Verbatim]. They are just printed as they are. +pub fn format_unknown_node(node: &SyntaxNode) -> FormatVerbatimNode { + FormatVerbatimNode { + node, + kind: VerbatimKind::Unknown, + format_comments: true, + } +} + +/// Format a node having formatter suppression comment applied to it +pub fn format_suppressed_node(node: &SyntaxNode) -> FormatVerbatimNode { + FormatVerbatimNode { + node, + kind: VerbatimKind::Suppressed, + format_comments: true, + } +} diff --git a/crates/rome_js_formatter/src/builders.rs b/crates/rome_js_formatter/src/builders.rs index a25bd449673..8b3d5a05f91 100644 --- a/crates/rome_js_formatter/src/builders.rs +++ b/crates/rome_js_formatter/src/builders.rs @@ -1,12 +1,7 @@ use crate::prelude::*; use crate::AsFormat; -use rome_formatter::token::{FormatInserted, FormatInsertedCloseParen, FormatInsertedOpenParen}; -use rome_formatter::{ - format_args, write, Argument, Arguments, CstFormatContext, FormatContext, GroupId, - PreambleBuffer, VecBuffer, -}; -use rome_js_syntax::{JsLanguage, JsSyntaxKind, JsSyntaxNode, JsSyntaxToken}; -use rome_rowan::{AstNode, Direction, Language, SyntaxElement, SyntaxTriviaPiece, TextRange}; +use rome_js_syntax::JsLanguage; +use rome_rowan::AstNode; /// Formats a node using its [`AsFormat`] implementation but falls back to printing the node as /// it is in the source document if the formatting returns an [`FormatError`]. @@ -36,549 +31,10 @@ where Err(_) => { f.restore_state_snapshot(snapshot); - // Lists that yield errors are formatted as they were unknown nodes. + // Lists that yield errors are formatted as they were suppressed nodes. // Doing so, the formatter formats the nodes/tokens as is. - format_unknown_node(self.node.syntax()).fmt(f) + format_suppressed_node(self.node.syntax()).fmt(f) } } } } - -pub fn format_inserted(kind: JsSyntaxKind) -> FormatInserted { - FormatInserted::new( - kind, - kind.to_string().expect("Expected a punctuation token"), - ) -} - -pub fn format_inserted_open_paren( - before_token: Option<&JsSyntaxToken>, - kind: JsSyntaxKind, -) -> FormatInsertedOpenParen { - FormatInsertedOpenParen::new( - before_token, - kind, - kind.to_string() - .expect("Expected a punctuation token as the open paren token."), - ) -} - -pub fn format_inserted_close_paren( - after_token: Option<&JsSyntaxToken>, - kind: JsSyntaxKind, - f: &mut JsFormatter, -) -> FormatInsertedCloseParen { - FormatInsertedCloseParen::after_token( - after_token, - kind, - kind.to_string() - .expect("Expected a punctuation token as the close paren token."), - f, - ) -} - -/// Adds parentheses around some content -/// Ensures that the leading trivia of the `first_content_token` is moved -/// before the opening parentheses and the trailing trivia of the `last_content_token` -/// is moved after the closing parentheses. -/// -/// # Examples -/// Adding parentheses around the string literal -/// -/// ```javascript -/// /* leading */ "test" /* trailing */; -/// ``` -/// -/// becomes -/// -/// ```javascript -/// /* leading */ ("test") /* trailing */; -/// ``` -pub fn format_parenthesize<'a, Content>( - first_content_token: Option<&'a JsSyntaxToken>, - content: &'a Content, - last_content_token: Option<&'a JsSyntaxToken>, -) -> FormatParenthesize<'a> -where - Content: Format, -{ - FormatParenthesize { - first_content_token, - content: Argument::new(content), - last_content_token, - grouped: false, - } -} - -/// Adds parentheses around an expression -#[derive(Clone)] -pub struct FormatParenthesize<'a> { - grouped: bool, - first_content_token: Option<&'a JsSyntaxToken>, - content: Argument<'a, JsFormatContext>, - last_content_token: Option<&'a JsSyntaxToken>, -} - -impl FormatParenthesize<'_> { - /// Groups the open parenthesis, the content, and the closing parenthesis inside of a group - /// and indents the content with a soft block indent. - pub fn grouped_with_soft_block_indent(mut self) -> Self { - self.grouped = true; - self - } -} - -impl Format for FormatParenthesize<'_> { - fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - let format_open_paren = - format_inserted_open_paren(self.first_content_token, JsSyntaxKind::L_PAREN); - let format_close_paren = - format_inserted_close_paren(self.last_content_token, JsSyntaxKind::R_PAREN, f); - - if self.grouped { - write!( - f, - [group(&format_args![ - format_open_paren, - soft_block_indent(&Arguments::from(&self.content)), - format_close_paren - ])] - ) - } else { - write!( - f, - [ - format_open_paren, - Arguments::from(&self.content), - format_close_paren - ] - ) - } - } -} - -/// "Formats" a node according to its original formatting in the source text. Being able to format -/// a node "as is" is useful if a node contains syntax errors. Formatting a node with syntax errors -/// has the risk that Rome misinterprets the structure of the code and formatting it could -/// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors. -/// -/// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes -/// nor its children source mapping information, resulting in incorrect source maps for this subtree. -/// -/// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand -/// if these nodes still need to have their own implementation. -pub fn format_verbatim_node(node: &JsSyntaxNode) -> FormatVerbatimNode { - FormatVerbatimNode { - node, - kind: VerbatimKind::Verbatim { - length: node.text_range().len(), - }, - } -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub struct FormatVerbatimNode<'node> { - node: &'node JsSyntaxNode, - kind: VerbatimKind, -} -impl Format for FormatVerbatimNode<'_> { - fn fmt(&self, f: &mut JsFormatter) -> FormatResult<()> { - for element in self.node.descendants_with_tokens(Direction::Next) { - match element { - SyntaxElement::Token(token) => f.state_mut().track_token(&token), - SyntaxElement::Node(node) => { - f.context().comments().mark_suppression_checked(&node); - } - } - } - - fn skip_whitespace(piece: &SyntaxTriviaPiece) -> bool { - piece.is_newline() || piece.is_whitespace() - } - - fn write_trivia_token( - f: &mut JsFormatter, - piece: SyntaxTriviaPiece, - ) -> FormatResult<()> { - syntax_token_cow_slice( - normalize_newlines(piece.text(), LINE_TERMINATORS), - &piece.token(), - piece.text_range().start(), - ) - .fmt(f) - } - - let trimmed_source_range = f.context().source_map().map_or_else( - || self.node.text_trimmed_range(), - |source_map| source_map.trimmed_source_range(self.node), - ); - - let mut buffer = VecBuffer::new(f.state_mut()); - - write!( - buffer, - [format_with(|f: &mut JsFormatter| { - fn source_range(f: &JsFormatter, range: TextRange) -> TextRange { - f.context() - .source_map() - .map_or_else(|| range, |source_map| source_map.source_range(range)) - } - - for leading_trivia in self - .node - .first_leading_trivia() - .into_iter() - .flat_map(|trivia| trivia.pieces()) - .skip_while(skip_whitespace) - { - let trivia_source_range = source_range(f, leading_trivia.text_range()); - - if trivia_source_range.start() >= trimmed_source_range.start() { - break; - } - - write_trivia_token(f, leading_trivia)?; - } - - let original_source = f.context().source_map().map_or_else( - || self.node.text_trimmed().to_string(), - |source_map| source_map.text()[trimmed_source_range].to_string(), - ); - - dynamic_text( - &normalize_newlines(&original_source, LINE_TERMINATORS), - self.node.text_trimmed_range().start(), - ) - .fmt(f)?; - - let mut trailing_trivia = self - .node - .last_trailing_trivia() - .into_iter() - .flat_map(|trivia| trivia.pieces()); - - let mut trailing_back = trailing_trivia.by_ref().rev().peekable(); - - while let Some(trailing) = trailing_back.peek() { - let is_whitespace = skip_whitespace(trailing); - - let trailing_source_range = source_range(f, trailing.text_range()); - let is_in_trimmed_range = - trailing_source_range.start() < trimmed_source_range.end(); - - if is_whitespace || is_in_trimmed_range { - trailing_back.next(); - } else { - break; - } - } - - for trailing_trivia in trailing_trivia { - write_trivia_token(f, trailing_trivia)?; - } - - Ok(()) - })] - )?; - - let content = buffer.into_vec(); - - let verbatim = Verbatim { - content: content.into_boxed_slice(), - kind: self.kind, - }; - - f.write_element(FormatElement::Verbatim(verbatim)) - } -} - -/// Formats unknown nodes. The difference between this method and `format_verbatim` is that this method -/// doesn't track nodes/tokens as [FormatElement::Verbatim]. They are just printed as they are. -pub fn format_unknown_node(node: &JsSyntaxNode) -> FormatUnknownNode { - FormatUnknownNode { node } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct FormatUnknownNode<'node> { - node: &'node JsSyntaxNode, -} - -impl Format for FormatUnknownNode<'_> { - fn fmt(&self, f: &mut JsFormatter) -> FormatResult<()> { - FormatVerbatimNode { - node: self.node, - kind: VerbatimKind::Unknown, - } - .fmt(f) - } -} - -/// Format a node having formatter suppression comment applied to it -pub fn format_suppressed_node(node: &JsSyntaxNode) -> FormatSuppressedNode { - FormatSuppressedNode { node } -} - -#[derive(Debug, Clone)] -pub struct FormatSuppressedNode<'node> { - node: &'node JsSyntaxNode, -} - -impl Format for FormatSuppressedNode<'_> { - fn fmt(&self, f: &mut JsFormatter) -> FormatResult<()> { - // Insert a force a line break to ensure the suppression comment is on its own line - // and correctly registers as a leading trivia on the opening token of this node - write!( - f, - [ - hard_line_break(), - FormatVerbatimNode { - node: self.node, - kind: VerbatimKind::Suppressed - } - ] - ) - } -} - -/// Formats a group delimited by an opening and closing token, -/// such as a function body delimited by '{' and '}' tokens -/// -/// Calling this method is required to correctly handle the comments attached -/// to the opening and closing tokens and insert them inside the group block -pub fn format_delimited<'a, 'content>( - open_token: &'a JsSyntaxToken, - content: &'content impl Format, - close_token: &'a JsSyntaxToken, -) -> FormatDelimited<'a, 'content> { - FormatDelimited { - open_token, - content: Argument::new(content), - close_token, - mode: DelimitedMode::SoftBlockIndent(None), - grouped: true, - } -} - -#[derive(Copy, Clone)] -pub struct FormatDelimited<'a, 'content> { - open_token: &'a JsSyntaxToken, - content: Argument<'content, JsFormatContext>, - close_token: &'a JsSyntaxToken, - mode: DelimitedMode, - grouped: bool, -} - -impl FormatDelimited<'_, '_> { - fn with_mode(mut self, mode: DelimitedMode) -> Self { - self.mode = mode; - self - } - - /// Formats a group delimited by an opening and closing token, placing the - /// content in a [block_indent] group - pub fn block_indent(self) -> Self { - self.with_mode(DelimitedMode::BlockIndent) - } - - /// Formats a group delimited by an opening and closing token, placing the - /// content in a [soft_block_indent] group - pub fn soft_block_indent(self) -> Self { - self.with_mode(DelimitedMode::SoftBlockIndent(None)) - } - - /// Formats a group delimited by an opening and closing token, placing the - /// content in an [indent] group with [soft_line_break_or_space] tokens at the - /// start and end - pub fn soft_block_spaces(self) -> Self { - self.with_mode(DelimitedMode::SoftBlockSpaces(None)) - } - - pub fn soft_block_indent_with_group_id(self, group_id: Option) -> Self { - self.with_mode(DelimitedMode::SoftBlockIndent(group_id)) - } - - /// Prevents the formatter from grouping the content even in soft block or soft block spaces mode. - pub fn ungrouped(mut self) -> Self { - self.grouped = false; - self - } -} - -impl Format for FormatDelimited<'_, '_> { - fn fmt(&self, f: &mut JsFormatter) -> FormatResult<()> { - let FormatDelimited { - open_token, - close_token, - content, - mode, - grouped, - } = self; - - let open_delimiter = format_open_delimiter(open_token); - let close_delimiter = format_close_delimiter(close_token); - - open_delimiter.format_leading_trivia().fmt(f)?; - - let open_token_trailing_trivia = open_delimiter.format_trailing_trivia(); - - let close_token_leading_trivia = close_delimiter.format_leading_trivia(); - - let delimited = format_with(|f| { - open_delimiter.format_token().fmt(f)?; - - let format_content = format_with(|f| f.write_fmt(Arguments::from(content))); - - match mode { - DelimitedMode::BlockIndent => block_indent(&format_args![ - open_token_trailing_trivia, - format_content, close_token_leading_trivia - ]) - .fmt(f)?, - DelimitedMode::SoftBlockIndent(_) => soft_block_indent(&format_args![ - open_token_trailing_trivia, - format_content, close_token_leading_trivia - ]) - .fmt(f)?, - DelimitedMode::SoftBlockSpaces(_) => { - let mut is_empty = true; - - let format_content = format_once(|f| { - let mut recording = f.start_recording(); - - write!( - recording, - [ - open_token_trailing_trivia, - format_content, - close_token_leading_trivia - ] - )?; - - is_empty = recording.stop().is_empty(); - - Ok(()) - }); - - soft_line_indent_or_space(&format_content).fmt(f)?; - - if !is_empty { - soft_line_break_or_space().fmt(f)?; - } - } - }; - - close_delimiter.format_token().fmt(f) - }); - - match mode { - _ if !grouped => write!(f, [delimited])?, - // Group is useless, the block indent would expand it right anyway - DelimitedMode::SoftBlockIndent(group_id) | DelimitedMode::SoftBlockSpaces(group_id) => { - match group_id { - None => write!(f, [group(&delimited)])?, - Some(group_id) => { - write!(f, [group(&delimited).with_group_id(Some(*group_id))])? - } - } - } - DelimitedMode::BlockIndent => write!(f, [delimited])?, - }; - - write!(f, [format_trailing_trivia(close_token)]) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum DelimitedMode { - BlockIndent, - SoftBlockIndent(Option), - SoftBlockSpaces(Option), -} - -/// Use this function to create an open delimiter, where you can extract the formatting of -/// trivias and token, separately. -/// -/// This function assumes that you will use the token to replicate [format_delimited], which means -/// that it will add possible line breaks -pub(crate) fn format_open_delimiter(open_token: &JsSyntaxToken) -> OpenDelimiter { - OpenDelimiter::new(open_token) -} - -/// Use this function to create an close delimiter, where you can extract the formatting of -/// trivias and token, separately. -/// -/// This function assumes that you will use the token to replicate [format_delimited], which means -/// that it will add possible line breaks -pub(crate) fn format_close_delimiter(close_token: &JsSyntaxToken) -> CloseDelimiter { - CloseDelimiter::new(close_token) -} - -pub(crate) struct OpenDelimiter<'t> { - open_token: &'t JsSyntaxToken, -} - -impl<'t> OpenDelimiter<'t> { - pub(crate) fn new(open_token: &'t JsSyntaxToken) -> Self { - Self { open_token } - } - - /// It extracts the formatted leading trivia of the token, without writing it in the buffer - pub(crate) fn format_leading_trivia(&self) -> impl Format + 't { - format_leading_trivia(self.open_token) - } - - /// It extracts the formatted trailing trivia of the token, without writing it in the buffer - pub(crate) fn format_trailing_trivia(&self) -> impl Format + 't { - format_with(|f| { - let mut recording = f.start_recording(); - write!(recording, [format_trailing_trivia(self.open_token)])?; - let recorded = recording.stop(); - - if !recorded.is_empty() { - soft_line_break().fmt(f)?; - } - - Ok(()) - }) - } - - /// It extracts the formatted token, without writing it in the buffer - pub(crate) fn format_token(&self) -> impl Format + 't { - format_with(|f| { - f.state_mut().track_token(self.open_token); - write!(f, [format_trimmed_token(self.open_token)]) - }) - } -} - -pub(crate) struct CloseDelimiter<'t> { - close_token: &'t JsSyntaxToken, -} - -impl<'t> CloseDelimiter<'t> { - pub(crate) fn new(close_token: &'t JsSyntaxToken) -> Self { - Self { close_token } - } - - /// It extracts the formatted leading trivia of the token, without writing it in the buffer - pub(crate) fn format_trailing_trivia(&self) -> impl Format + 't { - format_trailing_trivia(self.close_token) - } - - /// It extracts the formatted trailing trivia of the token, without writing it in the buffer - pub(crate) fn format_leading_trivia(&self) -> impl Format + 't { - format_with(|f| { - let mut buffer = PreambleBuffer::new(f, soft_line_break()); - - write!(buffer, [format_leading_trivia(self.close_token)]) - }) - } - - /// It extracts the formatted token, without writing it in the buffer - pub(crate) fn format_token(&self) -> impl Format + 't { - format_with(|f| { - f.state_mut().track_token(self.close_token); - write!(f, [format_trimmed_token(self.close_token)]) - }) - } -} diff --git a/crates/rome_js_formatter/src/lib.rs b/crates/rome_js_formatter/src/lib.rs index 2de1638c79e..a4e823daa53 100644 --- a/crates/rome_js_formatter/src/lib.rs +++ b/crates/rome_js_formatter/src/lib.rs @@ -252,7 +252,7 @@ pub mod utils; mod check_reformat; #[rustfmt::skip] mod generated; -pub(crate) mod builders; +mod builders; pub mod comments; pub mod context; mod parentheses;