From cfe95874d1e7a33b7e3b5ccfe59efb7b0af39bd9 Mon Sep 17 00:00:00 2001 From: Chris Pryer Date: Sat, 26 Aug 2023 13:27:18 -0400 Subject: [PATCH 1/7] Use `LineSuffix.reserved_width` in `ruff_python_formatter` --- crates/ruff_formatter/src/builders.rs | 56 ++++- .../src/format_element/document.rs | 16 +- .../ruff_formatter/src/format_element/tag.rs | 11 +- crates/ruff_formatter/src/printer/mod.rs | 41 +++- .../test/fixtures/ruff/expression/tuple.py | 2 + .../src/comments/format.rs | 53 +++- .../src/expression/mod.rs | 11 +- ...patibility@simple_cases__comments6.py.snap | 11 +- ...atibility@simple_cases__expression.py.snap | 19 +- ...ity@simple_cases__power_op_spacing.py.snap | 232 ++++++++++++++++++ ...@simple_cases__remove_await_parens.py.snap | 13 +- ...ompatibility@simple_cases__torture.py.snap | 15 +- .../format@expression__tuple.py.snap | 6 + .../format@parentheses__call_chains.py.snap | 4 +- .../format@statement__delete.py.snap | 8 +- .../snapshots/format@statement__try.py.snap | 4 +- .../snapshots/format@statement__while.py.snap | 4 +- 17 files changed, 456 insertions(+), 50 deletions(-) create mode 100644 crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__power_op_spacing.py.snap diff --git a/crates/ruff_formatter/src/builders.rs b/crates/ruff_formatter/src/builders.rs index 2605bf87e6c9d..a0060452f661c 100644 --- a/crates/ruff_formatter/src/builders.rs +++ b/crates/ruff_formatter/src/builders.rs @@ -435,46 +435,78 @@ fn debug_assert_no_newlines(text: &str) { debug_assert!(!text.contains('\r'), "The content '{text}' contains an unsupported '\\r' line terminator character but text must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings."); } -/// Pushes some content to the end of the current line +/// Pushes some content to the end of the current line. /// /// ## Examples /// -/// ``` -/// use ruff_formatter::{format}; +/// ```rust +/// use ruff_formatter::format; /// use ruff_formatter::prelude::*; /// -/// fn main() -> FormatResult<()> { +/// # fn main() -> FormatResult<()> { /// let elements = format!(SimpleFormatContext::default(), [ /// text("a"), -/// line_suffix(&text("c")), +/// line_suffix(&text("c"), 0), /// text("b") /// ])?; /// -/// assert_eq!( -/// "abc", -/// elements.print()?.as_code() -/// ); +/// assert_eq!("abc", elements.print()?.as_code()); +/// # Ok(()) +/// # } +/// ``` +/// +/// Provide reserved width for the line suffix to include it during measurement. +/// ```rust +/// use ruff_formatter::{format, format_args, LineWidth, SimpleFormatContext, SimpleFormatOptions}; +/// use ruff_formatter::prelude::*; +/// +/// # fn main() -> FormatResult<()> { +/// let context = SimpleFormatContext::new(SimpleFormatOptions { +/// line_width: LineWidth::try_from(10).unwrap(), +/// ..SimpleFormatOptions::default() +/// }); +/// +/// let elements = format!(context, [ +/// // Breaks +/// group(&format_args![ +/// if_group_breaks(&text("(")), +/// soft_block_indent(&format_args![text("a"), line_suffix(&text(" // a comment"), 13)]), +/// if_group_breaks(&text(")")) +/// ]), +/// +/// // Fits +/// group(&format_args![ +/// if_group_breaks(&text("(")), +/// soft_block_indent(&format_args![text("a"), line_suffix(&text(" // a comment"), 0)]), +/// if_group_breaks(&text(")")) +/// ]), +/// ])?; +/// # assert_eq!("(\n\ta // a comment\n)a // a comment", elements.print()?.as_code()); /// # Ok(()) /// # } /// ``` #[inline] -pub fn line_suffix(inner: &Content) -> LineSuffix +pub fn line_suffix(inner: &Content, reserved_width: u32) -> LineSuffix where Content: Format, { LineSuffix { content: Argument::new(inner), + reserved_width, } } #[derive(Copy, Clone)] pub struct LineSuffix<'a, Context> { content: Argument<'a, Context>, + reserved_width: u32, } impl Format for LineSuffix<'_, Context> { fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - f.write_element(FormatElement::Tag(StartLineSuffix)); + f.write_element(FormatElement::Tag(StartLineSuffix { + reserved_width: self.reserved_width, + })); Arguments::from(&self.content).fmt(f)?; f.write_element(FormatElement::Tag(EndLineSuffix)); @@ -501,7 +533,7 @@ impl std::fmt::Debug for LineSuffix<'_, Context> { /// # fn main() -> FormatResult<()> { /// let elements = format!(SimpleFormatContext::default(), [ /// text("a"), -/// line_suffix(&text("c")), +/// line_suffix(&text("c"), 0), /// text("b"), /// line_suffix_boundary(), /// text("d") diff --git a/crates/ruff_formatter/src/format_element/document.rs b/crates/ruff_formatter/src/format_element/document.rs index 23b64645aa9c9..1d682218746da 100644 --- a/crates/ruff_formatter/src/format_element/document.rs +++ b/crates/ruff_formatter/src/format_element/document.rs @@ -459,8 +459,16 @@ impl Format> for &[FormatElement] { )?; } - StartLineSuffix => { - write!(f, [text("line_suffix(")])?; + StartLineSuffix { reserved_width } => { + write!( + f, + [ + text("line_suffix("), + dynamic_text(&std::format!("{reserved_width:?}"), None), + text(","), + space(), + ] + )?; } StartVerbatim(_) => { @@ -672,7 +680,9 @@ impl FormatElements for [FormatElement] { match element { // Line suffix // Ignore if any of its content breaks - FormatElement::Tag(Tag::StartLineSuffix | Tag::StartFitsExpanded(_)) => { + FormatElement::Tag( + Tag::StartLineSuffix { reserved_width: _ } | Tag::StartFitsExpanded(_), + ) => { ignore_depth += 1; } FormatElement::Tag(Tag::EndLineSuffix | Tag::EndFitsExpanded) => { diff --git a/crates/ruff_formatter/src/format_element/tag.rs b/crates/ruff_formatter/src/format_element/tag.rs index 91609b82c4800..da69013faa271 100644 --- a/crates/ruff_formatter/src/format_element/tag.rs +++ b/crates/ruff_formatter/src/format_element/tag.rs @@ -63,8 +63,11 @@ pub enum Tag { StartEntry, EndEntry, - /// Delay the printing of its content until the next line break - StartLineSuffix, + /// Delay the printing of its content until the next line break. Using reserved width will include + /// the associated line suffix during measurement. + StartLineSuffix { + reserved_width: u32, + }, EndLineSuffix, /// A token that tracks tokens/nodes that are printed as verbatim. @@ -96,7 +99,7 @@ impl Tag { | Tag::StartIndentIfGroupBreaks(_) | Tag::StartFill | Tag::StartEntry - | Tag::StartLineSuffix + | Tag::StartLineSuffix { reserved_width: _ } | Tag::StartVerbatim(_) | Tag::StartLabelled(_) | Tag::StartFitsExpanded(_) @@ -122,7 +125,7 @@ impl Tag { StartIndentIfGroupBreaks(_) | EndIndentIfGroupBreaks => TagKind::IndentIfGroupBreaks, StartFill | EndFill => TagKind::Fill, StartEntry | EndEntry => TagKind::Entry, - StartLineSuffix | EndLineSuffix => TagKind::LineSuffix, + StartLineSuffix { reserved_width: _ } | EndLineSuffix => TagKind::LineSuffix, StartVerbatim(_) | EndVerbatim => TagKind::Verbatim, StartLabelled(_) | EndLabelled => TagKind::Labelled, StartFitsExpanded { .. } | EndFitsExpanded => TagKind::FitsExpanded, diff --git a/crates/ruff_formatter/src/printer/mod.rs b/crates/ruff_formatter/src/printer/mod.rs index a096bd31fae89..57ceddca260f6 100644 --- a/crates/ruff_formatter/src/printer/mod.rs +++ b/crates/ruff_formatter/src/printer/mod.rs @@ -233,7 +233,8 @@ impl<'a> Printer<'a> { stack.push(TagKind::IndentIfGroupBreaks, args); } - FormatElement::Tag(StartLineSuffix) => { + FormatElement::Tag(StartLineSuffix { reserved_width }) => { + self.state.line_width += reserved_width; self.state .line_suffixes .extend(args, queue.iter_content(TagKind::LineSuffix)); @@ -1188,7 +1189,11 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { } } - FormatElement::Tag(StartLineSuffix) => { + FormatElement::Tag(StartLineSuffix { reserved_width }) => { + self.state.line_width += reserved_width; + if self.state.line_width > self.options().print_width.into() { + return Ok(Fits::No); + } self.queue.skip_content(TagKind::LineSuffix); self.state.has_line_suffix = true; } @@ -1724,12 +1729,42 @@ two lines`, text("]") ]), text(";"), - line_suffix(&format_args![space(), text("// trailing")]) + line_suffix(&format_args![space(), text("// trailing")], 0) ]); assert_eq!(printed.as_code(), "[1, 2, 3]; // trailing"); } + #[test] + fn line_suffix_with_reserved_width() { + let printed = format(&format_args![ + group(&format_args![ + text("["), + soft_block_indent(&format_with(|f| { + f.fill() + .entry( + &soft_line_break_or_space(), + &format_args!(text("1"), text(",")), + ) + .entry( + &soft_line_break_or_space(), + &format_args!(text("2"), text(",")), + ) + .entry( + &soft_line_break_or_space(), + &format_args!(text("3"), if_group_breaks(&text(","))), + ) + .finish() + })), + text("]") + ]), + text(";"), + line_suffix(&format_args![space(), text("// Using reserved width causes this content to not fit even though it's a line suffix element")], 93) + ]); + + assert_eq!(printed.as_code(), "[\n 1, 2, 3\n]; // Using reserved width causes this content to not fit even though it's a line suffix element"); + } + #[test] fn conditional_with_group_id_in_fits() { let content = format_with(|f| { diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py index 9c6c5a6d5b113..f1078072139cc 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py @@ -66,3 +66,5 @@ h1 = ((((1, 2)))) h2 = ((((1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq")))) h3 = 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq" + +i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 7a36a3a406c2a..09c1c20b48539 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -1,9 +1,10 @@ use ruff_python_ast::Ranged; use ruff_text_size::{TextLen, TextRange, TextSize}; -use ruff_formatter::{format_args, write, FormatError, SourceCode}; +use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode, TabWidth}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; +use unicode_width::UnicodeWidthChar; use crate::comments::{CommentLinePosition, SourceComment}; use crate::context::NodeLevel; @@ -151,18 +152,33 @@ impl Format> for FormatTrailingComments<'_> { write!( f, [ - line_suffix(&format_args![ - empty_lines(lines_before_comment), - format_comment(trailing) - ]), + line_suffix( + &format_args![ + empty_lines(lines_before_comment), + format_comment(trailing) + ], + 0 // Reserving width isn't necessary because we don't split comments and the empty lines expand any enclosing group. + ), expand_parent() ] )?; } else { + // A trailing comment at the end of a line has a reserved width to consider during line measurement. + // ```python + // tup = ( + // "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + // ) # Some comment + // ``` write!( f, [ - line_suffix(&format_args![space(), space(), format_comment(trailing)]), + line_suffix( + &format_args![space(), space(), format_comment(trailing)], + measure_text( + &f.context().source()[slice.range()], + f.options().tab_width() + ) + 2 // Account for two added spaces + ), expand_parent() ] )?; @@ -264,10 +280,18 @@ impl Format> for FormatDanglingOpenParenthesisComments<'_> { "Expected dangling comment to be at the end of the line" ); + let slice = comment.slice(); write!( f, [ - line_suffix(&format_args!(space(), space(), format_comment(comment))), + line_suffix( + &format_args!(space(), space(), format_comment(comment)), + // Marking the comment as a line suffix with reserved width is safe since we expect the comment to be end of line. + measure_text( + &f.context().source()[slice.range()], + f.options().tab_width() + ) + 2 // Account for two added spaces + ), expand_parent() ] )?; @@ -374,3 +398,18 @@ impl Format> for FormatEmptyLines { } } } + +/// Helper for measuring text. +fn measure_text(text: &str, tab_width: TabWidth) -> u32 { + let mut width = 0; + for c in text.chars() { + let char_width = match c { + '\t' => tab_width.value(), + // SAFETY: A u32 is sufficient to format files <= 4GB + #[allow(clippy::cast_possible_truncation)] + c => c.width().unwrap_or(0) as u32, + }; + width += char_width; + } + width +} diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs index 52255ec4f4611..363eff9501d30 100644 --- a/crates/ruff_python_formatter/src/expression/mod.rs +++ b/crates/ruff_python_formatter/src/expression/mod.rs @@ -247,10 +247,13 @@ impl Format> for MaybeParenthesizeExpression<'_> { if format_expression.inspect(f)?.will_break() { // The group here is necessary because `format_expression` may contain IR elements // that refer to the group id - group(&format_expression) - .with_group_id(Some(group_id)) - .should_expand(true) - .fmt(f) + group(&format_args![ + text("("), + soft_block_indent(&format_expression), + text(")") + ]) + .with_group_id(Some(group_id)) + .fmt(f) } else { // Only add parentheses if it makes the expression fit on the line. // Using the flat version as the most expanded version gives a left-to-right splitting behavior diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments6.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments6.py.snap index 6725d8d840c1f..f48ec014b3d7c 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments6.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments6.py.snap @@ -156,7 +156,7 @@ aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*ite ) -@@ -108,11 +112,18 @@ +@@ -108,11 +112,20 @@ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" ) @@ -176,7 +176,10 @@ aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*ite + ], # type: ignore ) - aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*items))) # type: ignore[arg-type] +-aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*items))) # type: ignore[arg-type] ++aaaaaaaaaaaaa, bbbbbbbbb = map( ++ list, map(itertools.chain.from_iterable, zip(*items)) ++) # type: ignore[arg-type] ``` ## Ruff Output @@ -310,7 +313,9 @@ call_to_some_function_asdf( ], # type: ignore ) -aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*items))) # type: ignore[arg-type] +aaaaaaaaaaaaa, bbbbbbbbb = map( + list, map(itertools.chain.from_iterable, zip(*items)) +) # type: ignore[arg-type] ``` ## Black Output diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__expression.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__expression.py.snap index 86a5b37df9883..e55f593032574 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__expression.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__expression.py.snap @@ -300,7 +300,18 @@ last_call() ) # note: no trailing comma pre-3.6 call(*gidgets[:2]) call(a, *gidgets[:2]) -@@ -328,13 +329,18 @@ +@@ -142,7 +143,9 @@ + xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod( # type: ignore + sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__) + ) +-xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod( # type: ignore ++xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[ ++ ..., List[SomeClass] ++] = classmethod( # type: ignore + sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__) + ) + xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod( +@@ -328,13 +331,18 @@ ): return True if ( @@ -322,7 +333,7 @@ last_call() ^ aaaaaaaa.i << aaaaaaaa.k >> aaaaaaaa.l**aaaaaaaa.m // aaaaaaaa.n ): return True -@@ -342,7 +348,8 @@ +@@ -342,7 +350,8 @@ ~aaaaaaaaaaaaaaaa.a + aaaaaaaaaaaaaaaa.b - aaaaaaaaaaaaaaaa.c * aaaaaaaaaaaaaaaa.d @ aaaaaaaaaaaaaaaa.e @@ -482,7 +493,9 @@ very_long_variable_name_filters: t.List[ xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod( # type: ignore sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__) ) -xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod( # type: ignore +xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[ + ..., List[SomeClass] +] = classmethod( # type: ignore sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__) ) xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod( diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__power_op_spacing.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__power_op_spacing.py.snap new file mode 100644 index 0000000000000..de37c7048668d --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__power_op_spacing.py.snap @@ -0,0 +1,232 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/power_op_spacing.py +--- +## Input + +```py +def function(**kwargs): + t = a**2 + b**3 + return t ** 2 + + +def function_replace_spaces(**kwargs): + t = a **2 + b** 3 + c ** 4 + + +def function_dont_replace_spaces(): + {**a, **b, **c} + + +a = 5**~4 +b = 5 ** f() +c = -(5**2) +d = 5 ** f["hi"] +e = lazy(lambda **kwargs: 5) +f = f() ** 5 +g = a.b**c.d +h = 5 ** funcs.f() +i = funcs.f() ** 5 +j = super().name ** 5 +k = [(2**idx, value) for idx, value in pairs] +l = mod.weights_[0] == pytest.approx(0.95**100, abs=0.001) +m = [([2**63], [1, 2**63])] +n = count <= 10**5 +o = settings(max_examples=10**6) +p = {(k, k**2): v**2 for k, v in pairs} +q = [10**i for i in range(6)] +r = x**y + +a = 5.0**~4.0 +b = 5.0 ** f() +c = -(5.0**2.0) +d = 5.0 ** f["hi"] +e = lazy(lambda **kwargs: 5) +f = f() ** 5.0 +g = a.b**c.d +h = 5.0 ** funcs.f() +i = funcs.f() ** 5.0 +j = super().name ** 5.0 +k = [(2.0**idx, value) for idx, value in pairs] +l = mod.weights_[0] == pytest.approx(0.95**100, abs=0.001) +m = [([2.0**63.0], [1.0, 2**63.0])] +n = count <= 10**5.0 +o = settings(max_examples=10**6.0) +p = {(k, k**2): v**2.0 for k, v in pairs} +q = [10.5**i for i in range(6)] + + +# WE SHOULD DEFINITELY NOT EAT THESE COMMENTS (https://github.com/psf/black/issues/2873) +if hasattr(view, "sum_of_weights"): + return np.divide( # type: ignore[no-any-return] + view.variance, # type: ignore[union-attr] + view.sum_of_weights, # type: ignore[union-attr] + out=np.full(view.sum_of_weights.shape, np.nan), # type: ignore[union-attr] + where=view.sum_of_weights**2 > view.sum_of_weights_squared, # type: ignore[union-attr] + ) + +return np.divide( + where=view.sum_of_weights_of_weight_long**2 > view.sum_of_weights_squared, # type: ignore +) +``` + +## Black Differences + +```diff +--- Black ++++ Ruff +@@ -55,9 +55,11 @@ + view.variance, # type: ignore[union-attr] + view.sum_of_weights, # type: ignore[union-attr] + out=np.full(view.sum_of_weights.shape, np.nan), # type: ignore[union-attr] +- where=view.sum_of_weights**2 > view.sum_of_weights_squared, # type: ignore[union-attr] ++ where=view.sum_of_weights**2 ++ > view.sum_of_weights_squared, # type: ignore[union-attr] + ) + + return np.divide( +- where=view.sum_of_weights_of_weight_long**2 > view.sum_of_weights_squared, # type: ignore ++ where=view.sum_of_weights_of_weight_long**2 ++ > view.sum_of_weights_squared, # type: ignore + ) +``` + +## Ruff Output + +```py +def function(**kwargs): + t = a**2 + b**3 + return t**2 + + +def function_replace_spaces(**kwargs): + t = a**2 + b**3 + c**4 + + +def function_dont_replace_spaces(): + {**a, **b, **c} + + +a = 5**~4 +b = 5 ** f() +c = -(5**2) +d = 5 ** f["hi"] +e = lazy(lambda **kwargs: 5) +f = f() ** 5 +g = a.b**c.d +h = 5 ** funcs.f() +i = funcs.f() ** 5 +j = super().name ** 5 +k = [(2**idx, value) for idx, value in pairs] +l = mod.weights_[0] == pytest.approx(0.95**100, abs=0.001) +m = [([2**63], [1, 2**63])] +n = count <= 10**5 +o = settings(max_examples=10**6) +p = {(k, k**2): v**2 for k, v in pairs} +q = [10**i for i in range(6)] +r = x**y + +a = 5.0**~4.0 +b = 5.0 ** f() +c = -(5.0**2.0) +d = 5.0 ** f["hi"] +e = lazy(lambda **kwargs: 5) +f = f() ** 5.0 +g = a.b**c.d +h = 5.0 ** funcs.f() +i = funcs.f() ** 5.0 +j = super().name ** 5.0 +k = [(2.0**idx, value) for idx, value in pairs] +l = mod.weights_[0] == pytest.approx(0.95**100, abs=0.001) +m = [([2.0**63.0], [1.0, 2**63.0])] +n = count <= 10**5.0 +o = settings(max_examples=10**6.0) +p = {(k, k**2): v**2.0 for k, v in pairs} +q = [10.5**i for i in range(6)] + + +# WE SHOULD DEFINITELY NOT EAT THESE COMMENTS (https://github.com/psf/black/issues/2873) +if hasattr(view, "sum_of_weights"): + return np.divide( # type: ignore[no-any-return] + view.variance, # type: ignore[union-attr] + view.sum_of_weights, # type: ignore[union-attr] + out=np.full(view.sum_of_weights.shape, np.nan), # type: ignore[union-attr] + where=view.sum_of_weights**2 + > view.sum_of_weights_squared, # type: ignore[union-attr] + ) + +return np.divide( + where=view.sum_of_weights_of_weight_long**2 + > view.sum_of_weights_squared, # type: ignore +) +``` + +## Black Output + +```py +def function(**kwargs): + t = a**2 + b**3 + return t**2 + + +def function_replace_spaces(**kwargs): + t = a**2 + b**3 + c**4 + + +def function_dont_replace_spaces(): + {**a, **b, **c} + + +a = 5**~4 +b = 5 ** f() +c = -(5**2) +d = 5 ** f["hi"] +e = lazy(lambda **kwargs: 5) +f = f() ** 5 +g = a.b**c.d +h = 5 ** funcs.f() +i = funcs.f() ** 5 +j = super().name ** 5 +k = [(2**idx, value) for idx, value in pairs] +l = mod.weights_[0] == pytest.approx(0.95**100, abs=0.001) +m = [([2**63], [1, 2**63])] +n = count <= 10**5 +o = settings(max_examples=10**6) +p = {(k, k**2): v**2 for k, v in pairs} +q = [10**i for i in range(6)] +r = x**y + +a = 5.0**~4.0 +b = 5.0 ** f() +c = -(5.0**2.0) +d = 5.0 ** f["hi"] +e = lazy(lambda **kwargs: 5) +f = f() ** 5.0 +g = a.b**c.d +h = 5.0 ** funcs.f() +i = funcs.f() ** 5.0 +j = super().name ** 5.0 +k = [(2.0**idx, value) for idx, value in pairs] +l = mod.weights_[0] == pytest.approx(0.95**100, abs=0.001) +m = [([2.0**63.0], [1.0, 2**63.0])] +n = count <= 10**5.0 +o = settings(max_examples=10**6.0) +p = {(k, k**2): v**2.0 for k, v in pairs} +q = [10.5**i for i in range(6)] + + +# WE SHOULD DEFINITELY NOT EAT THESE COMMENTS (https://github.com/psf/black/issues/2873) +if hasattr(view, "sum_of_weights"): + return np.divide( # type: ignore[no-any-return] + view.variance, # type: ignore[union-attr] + view.sum_of_weights, # type: ignore[union-attr] + out=np.full(view.sum_of_weights.shape, np.nan), # type: ignore[union-attr] + where=view.sum_of_weights**2 > view.sum_of_weights_squared, # type: ignore[union-attr] + ) + +return np.divide( + where=view.sum_of_weights_of_weight_long**2 > view.sum_of_weights_squared, # type: ignore +) +``` + + diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__remove_await_parens.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__remove_await_parens.py.snap index 428ca61376081..628dbb5c6a048 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__remove_await_parens.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__remove_await_parens.py.snap @@ -93,7 +93,7 @@ async def main(): ```diff --- Black +++ Ruff -@@ -21,7 +21,9 @@ +@@ -21,11 +21,15 @@ # Check comments async def main(): @@ -103,6 +103,13 @@ async def main(): + ) + async def main(): +- await asyncio.sleep(1) # Hello ++ await ( ++ asyncio.sleep(1) # Hello ++ ) + + async def main(): ``` @@ -138,7 +145,9 @@ async def main(): async def main(): - await asyncio.sleep(1) # Hello + await ( + asyncio.sleep(1) # Hello + ) async def main(): diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__torture.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__torture.py.snap index f44e1053f0b4b..3a57c5b7a90ff 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__torture.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__torture.py.snap @@ -50,14 +50,17 @@ assert ( ) # assert sort_by_dependency( -@@ -25,9 +25,9 @@ +@@ -25,9 +25,11 @@ class A: def foo(self): for _ in range(10): - aaaaaaaaaaaaaaaaaaa = bbbbbbbbbbbbbbb.cccccccccc( -+ aaaaaaaaaaaaaaaaaaa = bbbbbbbbbbbbbbb.cccccccccc( # pylint: disable=no-member - xxxxxxxxxxxx +- xxxxxxxxxxxx - ) # pylint: disable=no-member ++ aaaaaaaaaaaaaaaaaaa = ( ++ bbbbbbbbbbbbbbb.cccccccccc( # pylint: disable=no-member ++ xxxxxxxxxxxx ++ ) + ) @@ -94,8 +97,10 @@ importA class A: def foo(self): for _ in range(10): - aaaaaaaaaaaaaaaaaaa = bbbbbbbbbbbbbbb.cccccccccc( # pylint: disable=no-member - xxxxxxxxxxxx + aaaaaaaaaaaaaaaaaaa = ( + bbbbbbbbbbbbbbb.cccccccccc( # pylint: disable=no-member + xxxxxxxxxxxx + ) ) diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap index 85dfa81fff34f..bb1b2aebaf01a 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap @@ -72,6 +72,8 @@ g2 = ( # a h1 = ((((1, 2)))) h2 = ((((1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq")))) h3 = 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq" + +i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break ``` ## Output @@ -275,6 +277,10 @@ h3 = ( 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq", ) + +i1 = ( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", +) # This should break ``` diff --git a/crates/ruff_python_formatter/tests/snapshots/format@parentheses__call_chains.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@parentheses__call_chains.py.snap index 8c8c952a79d7f..1bd2089f6b76f 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@parentheses__call_chains.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@parentheses__call_chains.py.snap @@ -268,7 +268,9 @@ d13 = ( ) # Doesn't fit, default -d2 = x.e().esadjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkfsdddd() # +d2 = ( + x.e().esadjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkfsdddd() # +) # Doesn't fit, fluent style d3 = ( diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__delete.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__delete.py.snap index ce4add0b9462c..2afea0bb26dd2 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__delete.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__delete.py.snap @@ -204,7 +204,13 @@ del ( # NOTE: This shouldn't format. See https://github.com/astral-sh/ruff/issues/5630. # Delete something -del x, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, b, c, d # Delete these +del ( + x, + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, + b, + c, + d, +) # Delete these # Ready to delete # Delete something diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__try.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__try.py.snap index 589d8a25e754c..e4fe04e4348cb 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__try.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__try.py.snap @@ -324,7 +324,9 @@ finally: try: # 1 preceding: any, following: first in body, enclosing: try print(1) # 2 preceding: last in body, following: fist in alt body, enclosing: try -except ZeroDivisionError: # 3 preceding: test, following: fist in alt body, enclosing: try +except ( + ZeroDivisionError +): # 3 preceding: test, following: fist in alt body, enclosing: try print(2) # 4 preceding: last in body, following: fist in alt body, enclosing: exc except: # 5 preceding: last in body, following: fist in alt body, enclosing: try print(2) # 6 preceding: last in body, following: fist in alt body, enclosing: exc diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__while.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__while.py.snap index fdc99df9a6526..9b09af15dbb59 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__while.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__while.py.snap @@ -57,7 +57,9 @@ while aVeryLongConditionThatSpillsOverToTheNextLineBecauseItIsExtremelyLongAndGo else: ... -while some_condition(unformatted, args) and anotherCondition or aThirdCondition: # comment +while ( + some_condition(unformatted, args) and anotherCondition or aThirdCondition +): # comment print("Do something") From a4d21fdcdc80d0409ee5931ea753ae9e2e603886 Mon Sep 17 00:00:00 2001 From: Chris Pryer Date: Sun, 27 Aug 2023 13:49:51 -0400 Subject: [PATCH 2/7] Replace `measure_text` with `normalize_comment` helper --- .../src/comments/format.rs | 74 +++++++++++++------ 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 09c1c20b48539..11186214c91b1 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -1,10 +1,11 @@ +use std::borrow::Cow; + use ruff_python_ast::Ranged; use ruff_text_size::{TextLen, TextRange, TextSize}; -use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode, TabWidth}; +use ruff_formatter::{format_args, write, FormatError, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; -use unicode_width::UnicodeWidthChar; use crate::comments::{CommentLinePosition, SourceComment}; use crate::context::NodeLevel; @@ -169,15 +170,16 @@ impl Format> for FormatTrailingComments<'_> { // "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // ) # Some comment // ``` + let source = SourceCode::new(f.context().source()); + // SAFETY: Ruff only supports formatting files <= 4GB + #[allow(clippy::cast_possible_truncation)] + let comment_len = normalize_comment(trailing, source).len() as u32; write!( f, [ line_suffix( &format_args![space(), space(), format_comment(trailing)], - measure_text( - &f.context().source()[slice.range()], - f.options().tab_width() - ) + 2 // Account for two added spaces + comment_len + 2 // Account for two added spaces ), expand_parent() ] @@ -280,17 +282,17 @@ impl Format> for FormatDanglingOpenParenthesisComments<'_> { "Expected dangling comment to be at the end of the line" ); - let slice = comment.slice(); + let source = SourceCode::new(f.context().source()); + // SAFETY: Ruff only supports formatting files <= 4GB + #[allow(clippy::cast_possible_truncation)] + let comment_len = normalize_comment(comment, source).len() as u32; write!( f, [ line_suffix( &format_args!(space(), space(), format_comment(comment)), // Marking the comment as a line suffix with reserved width is safe since we expect the comment to be end of line. - measure_text( - &f.context().source()[slice.range()], - f.options().tab_width() - ) + 2 // Account for two added spaces + comment_len + 2 // Account for two added spaces ), expand_parent() ] @@ -399,17 +401,43 @@ impl Format> for FormatEmptyLines { } } -/// Helper for measuring text. -fn measure_text(text: &str, tab_width: TabWidth) -> u32 { - let mut width = 0; - for c in text.chars() { - let char_width = match c { - '\t' => tab_width.value(), - // SAFETY: A u32 is sufficient to format files <= 4GB - #[allow(clippy::cast_possible_truncation)] - c => c.width().unwrap_or(0) as u32, - }; - width += char_width; +// TODO(cnpryer): Alternatively could just duplicate the range tracking from normalization instead +// of costly String operations and allocations. +// Could abstract the comment behind `NormalizedComment` +fn normalize_comment<'a>(comment: &'a SourceComment, source: SourceCode<'a>) -> Cow<'a, str> { + let slice = comment.slice(); + let comment_text = slice.text(source); + + // If the comment isn't leading with '#' it's invalid, so we return the text as-is borrowed. + // TODO(cnpryer): If this is always used alongside `.fmt()` on comments this should be safe, but + // we can probably do better here. + let Some(content) = comment_text.strip_prefix('#') else { + return Cow::Borrowed(comment_text); + }; + + // Make the content mutable in case we need to normalize it. Any normalization is done with lazy operations. + let mut content = Cow::Borrowed(content); + + // If the comment has trailing whitespace then we take ownership of a string to make mutations to + let trimmed = content.trim_end(); + let trailing_whitespace_len = content.text_len() - trimmed.text_len(); + if trailing_whitespace_len > TextSize::new(0) { + content = Cow::Owned(trimmed.to_owned()); + } + + // Fast path for correctly formatted comments: + // * Start with a `# '. + // * Have no trailing whitespace. + if trailing_whitespace_len == TextSize::new(0) && content.starts_with(' ') { + return Cow::Borrowed(comment_text); + } + + // Formatted comments start with '# '. We perform this normalization if it's necessary. + if !content.is_empty() && !content.starts_with([' ', '!', ':', '#', '\'', '\u{A0}']) { + Cow::Owned(std::format!("# {content}")) + } else if trailing_whitespace_len > TextSize::new(0) { + Cow::Owned(std::format!("#{content}")) + } else { + Cow::Borrowed(comment_text) } - width } From 3186ff15c960668e2f25ae15e8027b1596df99c2 Mon Sep 17 00:00:00 2001 From: Chris Pryer Date: Sun, 27 Aug 2023 15:58:17 -0400 Subject: [PATCH 3/7] Replace `normalize_comment` with `write_comment` strategy --- .../src/comments/format.rs | 146 +++++++----------- 1 file changed, 60 insertions(+), 86 deletions(-) diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 069855708d570..26efa692864b8 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -1,8 +1,6 @@ -use std::borrow::Cow; - use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; -use ruff_formatter::{format_args, write, FormatError, SourceCode}; +use ruff_formatter::{format_args, write, FormatError, FormatState, SourceCode, VecBuffer}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; @@ -169,16 +167,12 @@ impl Format> for FormatTrailingComments<'_> { // "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // ) # Some comment // ``` - let source = SourceCode::new(f.context().source()); - // SAFETY: Ruff only supports formatting files <= 4GB - #[allow(clippy::cast_possible_truncation)] - let comment_len = normalize_comment(trailing, source).len() as u32; write!( f, [ line_suffix( &format_args![space(), space(), format_comment(trailing)], - comment_len + 2 // Account for two added spaces + measure_comment(trailing, f.context())? + 2 // Account for two added spaces ), expand_parent() ] @@ -281,17 +275,13 @@ impl Format> for FormatDanglingOpenParenthesisComments<'_> { "Expected dangling comment to be at the end of the line" ); - let source = SourceCode::new(f.context().source()); - // SAFETY: Ruff only supports formatting files <= 4GB - #[allow(clippy::cast_possible_truncation)] - let comment_len = normalize_comment(comment, source).len() as u32; write!( f, [ line_suffix( &format_args!(space(), space(), format_comment(comment)), // Marking the comment as a line suffix with reserved width is safe since we expect the comment to be end of line. - comment_len + 2 // Account for two added spaces + measure_comment(comment, f.context())? + 2 // Account for two added spaces ), expand_parent() ] @@ -317,51 +307,9 @@ pub(crate) struct FormatComment<'a> { impl Format> for FormatComment<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - let slice = self.comment.slice(); - let comment_text = slice.text(SourceCode::new(f.context().source())); - - let trimmed = comment_text.trim_end(); - let trailing_whitespace_len = comment_text.text_len() - trimmed.text_len(); - - let Some(content) = trimmed.strip_prefix('#') else { - return Err(FormatError::syntax_error( - "Didn't find expected comment token `#`", - )); - }; - - // Fast path for correctly formatted comments: - // * Start with a `#` and are followed by a space - // * Have no trailing whitespace. - if trailing_whitespace_len == TextSize::new(0) && content.starts_with(' ') { - return source_text_slice(slice.range(), ContainsNewlines::No).fmt(f); - } - - write!(f, [source_position(slice.start()), text("#")])?; - - // Starts with a non breaking space - let start_offset = - if content.starts_with('\u{A0}') && !content.trim_start().starts_with("type:") { - // Replace non-breaking space with a space (if not followed by a normal space) - "#\u{A0}".text_len() - } else { - '#'.text_len() - }; - - // Add a space between the `#` and the text if the source contains none. - if !content.is_empty() && !content.starts_with([' ', '!', ':', '#', '\'']) { - write!(f, [space()])?; - } - - let start = slice.start() + start_offset; - let end = slice.end() - trailing_whitespace_len; - - write!( - f, - [ - source_text_slice(TextRange::new(start, end), ContainsNewlines::No), - source_position(slice.end()) - ] - ) + // We don't need the formatted comment's width. + let _ = write_comment(f, self.comment)?; + Ok(()) } } @@ -400,43 +348,69 @@ impl Format> for FormatEmptyLines { } } -// TODO(cnpryer): Alternatively could just duplicate the range tracking from normalization instead -// of costly String operations and allocations. -// Could abstract the comment behind `NormalizedComment` -fn normalize_comment<'a>(comment: &'a SourceComment, source: SourceCode<'a>) -> Cow<'a, str> { +/// A helper used to measure formatted comments. +/// +/// Use a temporary formatter to write a normalized, formatted comment +/// to in order to compute its width for a reserved-width line suffix element. +fn measure_comment(comment: &SourceComment, context: &PyFormatContext) -> FormatResult { + let mut state = FormatState::new(context.clone()); + let mut buffer = VecBuffer::new(&mut state); + let comment_len = write_comment(&mut Formatter::new(&mut buffer), comment)?; + Ok(comment_len) +} + +/// Write a comment to a formatter and return the normalized comment's width. +fn write_comment(f: &mut PyFormatter, comment: &SourceComment) -> FormatResult { let slice = comment.slice(); - let comment_text = slice.text(source); + let comment_text = slice.text(SourceCode::new(f.context().source())); - // If the comment isn't leading with '#' it's invalid, so we return the text as-is borrowed. - // TODO(cnpryer): If this is always used alongside `.fmt()` on comments this should be safe, but - // we can probably do better here. - let Some(content) = comment_text.strip_prefix('#') else { - return Cow::Borrowed(comment_text); - }; + // Track any additional width the formatted comment will have after normalization. + let mut added_width = TextSize::new(0); - // Make the content mutable in case we need to normalize it. Any normalization is done with lazy operations. - let mut content = Cow::Borrowed(content); + let trimmed = comment_text.trim_end(); + let trailing_whitespace_len = comment_text.text_len() - trimmed.text_len(); - // If the comment has trailing whitespace then we take ownership of a string to make mutations to - let trimmed = content.trim_end(); - let trailing_whitespace_len = content.text_len() - trimmed.text_len(); - if trailing_whitespace_len > TextSize::new(0) { - content = Cow::Owned(trimmed.to_owned()); - } + let Some(content) = trimmed.strip_prefix('#') else { + return Err(FormatError::syntax_error( + "Didn't find expected comment token `#`", + )); + }; // Fast path for correctly formatted comments: - // * Start with a `# '. + // * Start with a `#` and are followed by a space // * Have no trailing whitespace. if trailing_whitespace_len == TextSize::new(0) && content.starts_with(' ') { - return Cow::Borrowed(comment_text); + source_text_slice(slice.range(), ContainsNewlines::No).fmt(f)?; + return Ok(slice.range().len().into()); } - // Formatted comments start with '# '. We perform this normalization if it's necessary. - if !content.is_empty() && !content.starts_with([' ', '!', ':', '#', '\'', '\u{A0}']) { - Cow::Owned(std::format!("# {content}")) - } else if trailing_whitespace_len > TextSize::new(0) { - Cow::Owned(std::format!("#{content}")) - } else { - Cow::Borrowed(comment_text) + write!(f, [source_position(slice.start()), text("#")])?; + + // Starts with a non breaking space + let start_offset = + if content.starts_with('\u{A0}') && !content.trim_start().starts_with("type:") { + // Replace non-breaking space with a space (if not followed by a normal space) + "#\u{A0}".text_len() + } else { + '#'.text_len() + }; + + // Add a space between the `#` and the text if the source contains none. + if !content.is_empty() && !content.starts_with([' ', '!', ':', '#', '\'']) { + write!(f, [space()])?; + added_width += TextSize::new(1); } + + let start = slice.start() + start_offset; + let end = slice.end() - trailing_whitespace_len; + + write!( + f, + [ + source_text_slice(TextRange::new(start, end), ContainsNewlines::No), + source_position(slice.end()) + ] + )?; + + Ok((end - slice.start() + added_width).into()) } From ba678b1ca1f57303f5a43fc02a698d4d2302944b Mon Sep 17 00:00:00 2001 From: Chris Pryer Date: Mon, 28 Aug 2023 13:16:34 -0400 Subject: [PATCH 4/7] Use `normalize_comment` --- .../test/fixtures/ruff/expression/tuple.py | 7 + .../src/comments/format.rs | 190 +++++++++++------- .../format@expression__tuple.py.snap | 14 ++ 3 files changed, 143 insertions(+), 68 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py index f1078072139cc..94e9ae397a9bc 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py @@ -68,3 +68,10 @@ h3 = 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq" i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break + +# As of adding this fixture `black` replaces the non-breaking space with a space if followed by a space. +# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 +i2 = ("",) #  type: Add space before leading NBSP followed by spaces +i3 = ("",) #type: A space is added +i4 = ("",) #  type: Add space before leading NBSP followed by a space +i5 = ("",) # type: Add space before leading NBSP diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 26efa692864b8..4a9d65346a06f 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -1,6 +1,8 @@ +use std::borrow::Cow; + use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; -use ruff_formatter::{format_args, write, FormatError, FormatState, SourceCode, VecBuffer}; +use ruff_formatter::{format_args, write, FormatError, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; @@ -155,28 +157,22 @@ impl Format> for FormatTrailingComments<'_> { empty_lines(lines_before_comment), format_comment(trailing) ], - 0 // Reserving width isn't necessary because we don't split comments and the empty lines expand any enclosing group. + // Reserving width isn't necessary because we don't split + // comments and the empty lines expand any enclosing group. + 0 ), expand_parent() ] )?; } else { - // A trailing comment at the end of a line has a reserved width to consider during line measurement. + // A trailing comment at the end of a line has a reserved width to + // consider during line measurement. // ```python // tup = ( // "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // ) # Some comment // ``` - write!( - f, - [ - line_suffix( - &format_args![space(), space(), format_comment(trailing)], - measure_comment(trailing, f.context())? + 2 // Account for two added spaces - ), - expand_parent() - ] - )?; + trailing_end_of_line_comment(trailing).fmt(f)?; } trailing.mark_formatted(); @@ -275,17 +271,7 @@ impl Format> for FormatDanglingOpenParenthesisComments<'_> { "Expected dangling comment to be at the end of the line" ); - write!( - f, - [ - line_suffix( - &format_args!(space(), space(), format_comment(comment)), - // Marking the comment as a line suffix with reserved width is safe since we expect the comment to be end of line. - measure_comment(comment, f.context())? + 2 // Account for two added spaces - ), - expand_parent() - ] - )?; + trailing_end_of_line_comment(comment).fmt(f)?; comment.mark_formatted(); } @@ -307,9 +293,12 @@ pub(crate) struct FormatComment<'a> { impl Format> for FormatComment<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - // We don't need the formatted comment's width. - let _ = write_comment(f, self.comment)?; - Ok(()) + let slice = self.comment.slice(); + let source = SourceCode::new(f.context().source()); + + let normalized_comment = normalize_comment(self.comment, source)?; + + format_normalized_comment(normalized_comment, slice.start()).fmt(f) } } @@ -348,24 +337,95 @@ impl Format> for FormatEmptyLines { } } -/// A helper used to measure formatted comments. +/// A helper that constructs a formattable element using a reserved-width line-suffix +/// for normalized comments. /// -/// Use a temporary formatter to write a normalized, formatted comment -/// to in order to compute its width for a reserved-width line suffix element. -fn measure_comment(comment: &SourceComment, context: &PyFormatContext) -> FormatResult { - let mut state = FormatState::new(context.clone()); - let mut buffer = VecBuffer::new(&mut state); - let comment_len = write_comment(&mut Formatter::new(&mut buffer), comment)?; - Ok(comment_len) +/// * Black normalization of `SourceComment`. +/// * Line suffix with reserved width for the final, normalized content. +/// * Expands parent node. +pub(crate) const fn trailing_end_of_line_comment( + comment: &SourceComment, +) -> FormatTrailingEndOfLineComment { + FormatTrailingEndOfLineComment { comment } } -/// Write a comment to a formatter and return the normalized comment's width. -fn write_comment(f: &mut PyFormatter, comment: &SourceComment) -> FormatResult { - let slice = comment.slice(); - let comment_text = slice.text(SourceCode::new(f.context().source())); +pub(crate) struct FormatTrailingEndOfLineComment<'a> { + comment: &'a SourceComment, +} - // Track any additional width the formatted comment will have after normalization. - let mut added_width = TextSize::new(0); +impl Format> for FormatTrailingEndOfLineComment<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let slice = self.comment.slice(); + let source = SourceCode::new(f.context().source()); + + let normalized_comment = normalize_comment(self.comment, source)?; + let reserved_width = normalized_comment.text_len().to_u32() + 2; // Account for two added spaces + + write!( + f, + [ + line_suffix( + &format_args![ + space(), + space(), + format_normalized_comment(normalized_comment, slice.start()) + ], + reserved_width + ), + expand_parent() + ] + ) + } +} + +/// A helper that constructs formattable normalized comment text as efficiently as +/// possible. +/// +/// * If the content is unaltered then format with source text slice strategy and no +/// unnecessary allocations. +/// * If the content is modified then make as few allocations as possible and use +/// a dynamic text element at the original slice's start position. +pub(crate) const fn format_normalized_comment( + comment: Cow<'_, str>, + start_position: TextSize, +) -> FormatNormalizedComment<'_> { + FormatNormalizedComment { + comment, + start_position, + } +} + +pub(crate) struct FormatNormalizedComment<'a> { + comment: Cow<'a, str>, + start_position: TextSize, +} + +impl Format> for FormatNormalizedComment<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + match self.comment { + Cow::Borrowed(borrowed) => source_text_slice( + TextRange::at(self.start_position, borrowed.text_len()), + ContainsNewlines::No, + ) + .fmt(f), + + Cow::Owned(ref owned) => dynamic_text(owned, Some(self.start_position)).fmt(f), + } + } +} + +/// A helper for normalizing comments efficiently. +/// +/// * Return as fast as possible without making unnecessary allocations. +/// * Trim any trailing whitespace. +/// * Normalize for a leading '# '. +/// * Retain non-breaking spaces for 'type:' pragmas by leading with '# \u{A0}'. +fn normalize_comment<'a>( + comment: &'a SourceComment, + source: SourceCode<'a>, +) -> FormatResult> { + let slice = comment.slice(); + let comment_text = slice.text(source); let trimmed = comment_text.trim_end(); let trailing_whitespace_len = comment_text.text_len() - trimmed.text_len(); @@ -376,41 +436,35 @@ fn write_comment(f: &mut PyFormatter, comment: &SourceComment) -> FormatResult Date: Tue, 29 Aug 2023 18:14:37 -0400 Subject: [PATCH 5/7] Normalize comment in type: pragma --- crates/ruff_python_formatter/src/comments/format.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 4a9d65346a06f..d2870e7db49f1 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -452,7 +452,7 @@ fn normalize_comment<'a>( // Black adds a space before the non-breaking space if part of a type pragma. if trimmed.trim_start().starts_with("type:") { - return Ok(Cow::Borrowed(comment_text)); + return Ok(Cow::Owned(std::format!("# \u{A0}{trimmed}"))); } // Black replaces the non-breaking space with a space if followed by a space. From 1745d3df3c27e14945766e5282900bc9d49870f1 Mon Sep 17 00:00:00 2001 From: Chris Pryer Date: Tue, 29 Aug 2023 19:02:59 -0400 Subject: [PATCH 6/7] Fix new fixtures comment --- .../resources/test/fixtures/ruff/expression/tuple.py | 2 +- .../tests/snapshots/format@expression__tuple.py.snap | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py index 94e9ae397a9bc..2aebe8989e3d9 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py @@ -69,7 +69,7 @@ i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break -# As of adding this fixture `black` replaces the non-breaking space with a space if followed by a space. +# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. # https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 i2 = ("",) #  type: Add space before leading NBSP followed by spaces i3 = ("",) #type: A space is added diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap index 5f692c1a93f24..386418c094bb9 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap @@ -75,7 +75,7 @@ h3 = 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurp i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break -# As of adding this fixture `black` replaces the non-breaking space with a space if followed by a space. +# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. # https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 i2 = ("",) #  type: Add space before leading NBSP followed by spaces i3 = ("",) #type: A space is added @@ -289,7 +289,7 @@ i1 = ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ) # This should break -# As of adding this fixture `black` replaces the non-breaking space with a space if followed by a space. +# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. # https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 i2 = ("",) #   type: Add space before leading NBSP followed by spaces i3 = ("",) # type: A space is added From c2e6deaef8c7016c608128a9b9a94466fe086bea Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 30 Aug 2023 09:43:53 +0200 Subject: [PATCH 7/7] Use width instead of byte length, small normalize comment cleanups --- .../comments_non_breaking_space.py | 2 +- .../test/fixtures/ruff/expression/tuple.py | 6 -- .../test/fixtures/ruff/trailing_comments.py | 6 ++ .../src/comments/format.rs | 55 +++++++++++-------- .../format@expression__tuple.py.snap | 13 ----- .../format@trailing_comments.py.snap | 26 +++++++++ 6 files changed, 66 insertions(+), 42 deletions(-) create mode 100644 crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py create mode 100644 crates/ruff_python_formatter/tests/snapshots/format@trailing_comments.py.snap diff --git a/crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comments_non_breaking_space.py b/crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comments_non_breaking_space.py index d1d42f025969c..0c5f5660ebba9 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comments_non_breaking_space.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comments_non_breaking_space.py @@ -14,6 +14,6 @@ def function(a:int=42): a b """ - #  There's a NBSP + 3 spaces before + #    There's a NBSP + 3 spaces before # And 4 spaces on the next line pass diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py index 2aebe8989e3d9..af5273484d2d5 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py @@ -69,9 +69,3 @@ i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break -# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. -# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 -i2 = ("",) #  type: Add space before leading NBSP followed by spaces -i3 = ("",) #type: A space is added -i4 = ("",) #  type: Add space before leading NBSP followed by a space -i5 = ("",) # type: Add space before leading NBSP diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py new file mode 100644 index 0000000000000..5bfe148cf5f30 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py @@ -0,0 +1,6 @@ +# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. +# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 +i2 = "" #  type: Add space before leading NBSP followed by spaces +i3 = "" #type: A space is added +i4 = "" #  type: Add space before leading NBSP followed by a space +i5 = "" # type: Add space before leading NBSP diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index c479d15a3c83e..7be01669e39ce 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -1,8 +1,9 @@ use std::borrow::Cow; +use unicode_width::UnicodeWidthChar; -use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextLen, TextRange}; -use ruff_formatter::{format_args, write, FormatError, SourceCode}; +use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; @@ -294,7 +295,7 @@ impl Format> for FormatComment<'_> { let normalized_comment = normalize_comment(self.comment, source)?; - format_normalized_comment(normalized_comment, slice.start()).fmt(f) + format_normalized_comment(normalized_comment, slice.range()).fmt(f) } } @@ -355,7 +356,18 @@ impl Format> for FormatTrailingEndOfLineComment<'_> { let source = SourceCode::new(f.context().source()); let normalized_comment = normalize_comment(self.comment, source)?; - let reserved_width = normalized_comment.text_len().to_u32() + 2; // Account for two added spaces + + // Start with 2 because of the two leading spaces. + let mut reserved_width = 2; + + // SAFE: The formatted file is <= 4GB, and each comment should as well. + #[allow(clippy::cast_possible_truncation)] + for c in normalized_comment.chars() { + reserved_width += match c { + '\t' => f.options().tab_width().value(), + c => c.width().unwrap_or(0) as u32, + } + } write!( f, @@ -364,7 +376,7 @@ impl Format> for FormatTrailingEndOfLineComment<'_> { &format_args![ space(), space(), - format_normalized_comment(normalized_comment, slice.start()) + format_normalized_comment(normalized_comment, slice.range()) ], reserved_width ), @@ -383,29 +395,34 @@ impl Format> for FormatTrailingEndOfLineComment<'_> { /// a dynamic text element at the original slice's start position. pub(crate) const fn format_normalized_comment( comment: Cow<'_, str>, - start_position: TextSize, + range: TextRange, ) -> FormatNormalizedComment<'_> { - FormatNormalizedComment { - comment, - start_position, - } + FormatNormalizedComment { comment, range } } pub(crate) struct FormatNormalizedComment<'a> { comment: Cow<'a, str>, - start_position: TextSize, + range: TextRange, } impl Format> for FormatNormalizedComment<'_> { fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { match self.comment { Cow::Borrowed(borrowed) => source_text_slice( - TextRange::at(self.start_position, borrowed.text_len()), + TextRange::at(self.range.start(), borrowed.text_len()), ContainsNewlines::No, ) .fmt(f), - Cow::Owned(ref owned) => dynamic_text(owned, Some(self.start_position)).fmt(f), + Cow::Owned(ref owned) => { + write!( + f, + [ + dynamic_text(owned, Some(self.range.start())), + source_position(self.range.end()) + ] + ) + } } } } @@ -424,7 +441,6 @@ fn normalize_comment<'a>( let comment_text = slice.text(source); let trimmed = comment_text.trim_end(); - let trailing_whitespace_len = comment_text.text_len() - trimmed.text_len(); let Some(content) = trimmed.strip_prefix('#') else { return Err(FormatError::syntax_error( @@ -439,8 +455,8 @@ fn normalize_comment<'a>( // Fast path for correctly formatted comments: // * Start with a `# '. // * Have no trailing whitespace. - if trailing_whitespace_len == TextSize::new(0) && content.starts_with(' ') { - return Ok(Cow::Borrowed(comment_text)); + if content.starts_with([' ', '!', ':', '#', '\'']) { + return Ok(Cow::Borrowed(trimmed)); } if content.starts_with('\u{A0}') { @@ -457,10 +473,5 @@ fn normalize_comment<'a>( } } - // We normalize the leading space if we can. - if !content.starts_with([' ', '!', ':', '#', '\'']) { - return Ok(Cow::Owned(std::format!("# {}", content.trim_start()))); - } - - Ok(Cow::Owned(std::format!("#{content}"))) + Ok(Cow::Owned(std::format!("# {}", content.trim_start()))) } diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap index 386418c094bb9..0f8997294a341 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__tuple.py.snap @@ -75,12 +75,6 @@ h3 = 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurp i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",) # This should break -# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. -# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 -i2 = ("",) #  type: Add space before leading NBSP followed by spaces -i3 = ("",) #type: A space is added -i4 = ("",) #  type: Add space before leading NBSP followed by a space -i5 = ("",) # type: Add space before leading NBSP ``` ## Output @@ -288,13 +282,6 @@ h3 = ( i1 = ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ) # This should break - -# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. -# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 -i2 = ("",) #   type: Add space before leading NBSP followed by spaces -i3 = ("",) # type: A space is added -i4 = ("",) #   type: Add space before leading NBSP followed by a space -i5 = ("",) #  type: Add space before leading NBSP ``` diff --git a/crates/ruff_python_formatter/tests/snapshots/format@trailing_comments.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@trailing_comments.py.snap new file mode 100644 index 0000000000000..8ddbd6b6f5e2a --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/format@trailing_comments.py.snap @@ -0,0 +1,26 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py +--- +## Input +```py +# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. +# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 +i2 = "" #  type: Add space before leading NBSP followed by spaces +i3 = "" #type: A space is added +i4 = "" #  type: Add space before leading NBSP followed by a space +i5 = "" # type: Add space before leading NBSP +``` + +## Output +```py +# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma. +# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129 +i2 = "" #   type: Add space before leading NBSP followed by spaces +i3 = "" # type: A space is added +i4 = "" #   type: Add space before leading NBSP followed by a space +i5 = "" #  type: Add space before leading NBSP +``` + + +