Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update 'convert to raw string' to help cleanup strings with leading whitespace #60936

Merged
merged 7 commits into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/Features/CSharp/Portable/CSharpFeaturesResources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,8 @@
<data name="Convert_to_raw_string" xml:space="preserve">
<value>Convert to raw string</value>
</data>
<data name="Convert_to_raw_string_no_indent" xml:space="preserve">
<value>Convert to raw string (no indent)</value>
<data name="without_leading_whitespace_may_change_semantics" xml:space="preserve">
<value>... without leading whitespace (may change semantics)</value>
CyrusNajmabadi marked this conversation as resolved.
Show resolved Hide resolved
</data>
<data name="Apply_blank_lines_between_consecutive_braces_preferences_experimental" xml:space="preserve">
<value>Apply blank lines between consecutive braces preferences (experimental)</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,18 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Immutable;
using System.Composition;
using System.Linq;
using System.Reflection.PortableExecutable;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.CodeActions;
using Microsoft.CodeAnalysis.CodeRefactorings;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.Formatting;
using Microsoft.CodeAnalysis.Host.Mef;
using Microsoft.CodeAnalysis.Indentation;
Expand All @@ -27,8 +33,8 @@ internal class ConvertRegularStringToRawStringCodeRefactoringProvider : CodeRefa
private enum ConvertToRawKind
{
SingleLine,
MultiLine,
MultiLineIndented,
MultiLineWithoutLeadingWhitespace,
}

[ImportingConstructor]
Expand Down Expand Up @@ -85,9 +91,6 @@ public override async Task ComputeRefactoringsAsync(CodeRefactoringContext conte
}
else
{
var sourceText = await document.GetTextAsync(cancellationToken).ConfigureAwait(true);
sourceText.GetLineAndOffset(token.SpanStart, out _, out var lineOffset);

context.RegisterRefactoring(
new MyCodeAction(
CSharpFeaturesResources.Convert_to_raw_string,
Expand All @@ -96,19 +99,52 @@ public override async Task ComputeRefactoringsAsync(CodeRefactoringContext conte
priority),
token.Span);

if (lineOffset > 0)
// Users sometimes write verbatim string literals with a extra starting newline (or indentation) purely
// for aesthetic reasons. For example:
//
// var v = @"
// SELECT column1, column2, ...
// FROM table_name";
//
// Converting this directly to a raw string will produce:
//
// var v = """
//
// SELECT column1, column2, ...
// FROM table_name";
// """
//
// Check for this and offer instead to generate:
//
// var v = """
// SELECT column1, column2, ...
// FROM table_name";
// """
//
// This changes the contents of the literal, but that can be fine for the domain the user is working in.
// Offer this, but let the user know that this will change runtime semantics.
if (token.IsVerbatimStringLiteral() && HasLeadingWhitespace(characters))
{
context.RegisterRefactoring(
new MyCodeAction(
CSharpFeaturesResources.Convert_to_raw_string_no_indent,
c => UpdateDocumentAsync(document, span, ConvertToRawKind.MultiLine, formattingOptions, c),
nameof(CSharpFeaturesResources.Convert_to_raw_string_no_indent),
CSharpFeaturesResources.without_leading_whitespace_may_change_semantics,
c => UpdateDocumentAsync(document, span, ConvertToRawKind.MultiLineWithoutLeadingWhitespace, formattingOptions, c),
nameof(CSharpFeaturesResources.without_leading_whitespace_may_change_semantics),
priority),
token.Span);
}
}
}

private static bool HasLeadingWhitespace(VirtualCharSequence characters)
{
var index = 0;
while (index < characters.Length && IsCSharpWhitespace(characters[index]))
index++;

return index < characters.Length && IsCSharpNewLine(characters[index]);
}

private static async Task<Document> UpdateDocumentAsync(
Document document, TextSpan span, ConvertToRawKind kind, SyntaxFormattingOptions options, CancellationToken cancellationToken)
{
Expand All @@ -128,20 +164,143 @@ private static SyntaxToken GetReplacementToken(
SyntaxFormattingOptions options,
CancellationToken cancellationToken)
{
return kind switch
var characters = CSharpVirtualCharService.Instance.TryConvertToVirtualChars(token);
Contract.ThrowIfTrue(characters.IsDefaultOrEmpty);

// If the user asked to remove whitespace then do so now.
if (kind == ConvertToRawKind.MultiLineWithoutLeadingWhitespace)
characters = CleanupWhitespace(characters);

return kind == ConvertToRawKind.SingleLine
? ConvertToSingleLineRawString(token, characters)
: ConvertToMultiLineRawIndentedString(document, token, options, characters, cancellationToken);
}

private static VirtualCharSequence CleanupWhitespace(VirtualCharSequence characters)
{
using var _ = ArrayBuilder<VirtualCharSequence>.GetInstance(out var lines);

// First, determine all the lines in the content.
BreakIntoLines(characters, lines);

// Remove the leading and trailing line if they are all whitespace.
while (lines.Count > 0 && AllWhitespace(lines.First()))
lines.RemoveAt(0);

while (lines.Count > 0 && AllWhitespace(lines.Last()))
lines.RemoveAt(lines.Count - 1);

if (lines.Count == 0)
return VirtualCharSequence.Empty;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this check be moved up and prevent the fix from being offered? I can't think of why someone would have a bunch of whitespace is a verbatim string, but offering to convert it to an empty string is probably not what they want to happen.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair point. Updated to not offer in that case.


// Use the remaining lines to figure out what common whitespace we have.
var commonWhitespacePrefix = ComputeCommonWhitespacePrefix(lines);

var result = ImmutableSegmentedList.CreateBuilder<VirtualChar>();

foreach (var line in lines)
{
ConvertToRawKind.SingleLine => ConvertToSingleLineRawString(token),
ConvertToRawKind.MultiLine => ConvertToMultiLineRawString(token, options.NewLine),
ConvertToRawKind.MultiLineIndented => ConvertToMultiLineRawIndentedString(document, token, options, cancellationToken),
_ => throw ExceptionUtilities.UnexpectedValue(kind),
};
var lineEnumerable = line.AsEnumerable();
if (AllWhitespace(line))
{
// For an all-whitespace line, just add the trailing newlines on the line (if present).
result.AddRange(lineEnumerable.SkipWhile(IsCSharpWhitespace));
}
else
{
// Normal line. Skip the common whitespace.
result.AddRange(lineEnumerable.Skip(commonWhitespacePrefix));
}
}

// Remove all trailing whitespace and newlines from the final string.
while (result.Count > 0 && (IsCSharpNewLine(result[^1]) || IsCSharpWhitespace(result[^1])))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wasn't this done here?
https://github.com/dotnet/roslyn/pull/60936/files#diff-0428e947f9429e42713669adec6cd527db6d211b4197c9fdf2b6af0cf55cd77aR190

We can't add more trailing whitespace as part of skipping the common ones can we?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's slightly different. the first loop is removing entirely blank lines. The last loop is removing any trailing newline from the last line (which is not an allwhitespace line (since htat would have already been removed)).

result.RemoveAt(result.Count - 1);

return VirtualCharSequence.Create(result.ToImmutable());
}

private static SyntaxToken ConvertToMultiLineRawIndentedString(Document document, SyntaxToken token, SyntaxFormattingOptions formattingOptions, CancellationToken cancellationToken)
private static int ComputeCommonWhitespacePrefix(ArrayBuilder<VirtualCharSequence> lines)
{
var characters = CSharpVirtualCharService.Instance.TryConvertToVirtualChars(token);
Contract.ThrowIfTrue(characters.IsDefaultOrEmpty);
var commonLeadingWhitespace = GetLeadingWhitespace(lines.First());

for (var i = 1; i < lines.Count; i++)
{
if (commonLeadingWhitespace.IsEmpty)
return 0;

var currentLine = lines[i];
if (AllWhitespace(currentLine))
continue;

var currentLineLeadingWhitespace = GetLeadingWhitespace(currentLine);
commonLeadingWhitespace = ComputeCommonWhitespacePrefix(commonLeadingWhitespace, currentLineLeadingWhitespace);
}

return commonLeadingWhitespace.Length;
}

private static VirtualCharSequence ComputeCommonWhitespacePrefix(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could this just be an int?

Where you consume the current line text up until either the current 'common' index or a non-whitespace char?

Or maybe just a simple extension to virtual char that is GetFirstNonWhitespaceIndex and then compare the indices? Think that could replace some other helpers (AllWhitespace) as well

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'll see if i can do that! :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, this can't be an int. if it's an int, we can't distinguish spaces/tabs.

VirtualCharSequence leadingWhitespace1, VirtualCharSequence leadingWhitespace2)
{
var length = Math.Min(leadingWhitespace1.Length, leadingWhitespace2.Length);

var current = 0;
while (current < length && IsCSharpWhitespace(leadingWhitespace1[current]) && leadingWhitespace1[current].Rune == leadingWhitespace2[current].Rune)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we care about cases of mixed tabs/spaces?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm ok with thsi only trimming off the part it is certain is shared across the files. If you mix tabs/spaces... well... that's either rare or bizarre, and i'm ok ignoring :)

current++;

return leadingWhitespace1.GetSubSequence(TextSpan.FromBounds(0, current));
}

private static VirtualCharSequence GetLeadingWhitespace(VirtualCharSequence line)
{
var current = 0;
while (current < line.Length && IsCSharpWhitespace(line[current]))
current++;

return line.GetSubSequence(TextSpan.FromBounds(0, current));
}

private static void BreakIntoLines(VirtualCharSequence characters, ArrayBuilder<VirtualCharSequence> lines)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should virtualcharsequence have a split extension that takes in the characters to split on?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possibly. though \r\n would be a PITA to have to deal with :(

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah good point

{
var index = 0;

while (index < characters.Length)
lines.Add(GetNextLine(characters, ref index));
}

private static VirtualCharSequence GetNextLine(
VirtualCharSequence characters,
ref int index)
{
var end = index;
while (end < characters.Length && !IsCSharpNewLine(characters[end]))
end++;

if (end != characters.Length)
end += IsCarriageReturnNewLine(characters, end) ? 2 : 1;

var result = characters.GetSubSequence(TextSpan.FromBounds(index, end));
index = end;
return result;
}

private static bool AllWhitespace(VirtualCharSequence line)
{
var index = 0;
while (index < line.Length && IsCSharpWhitespace(line[index]))
index++;

return index == line.Length || IsCSharpNewLine(line[index]);
}

private static SyntaxToken ConvertToMultiLineRawIndentedString(
Document document,
SyntaxToken token,
SyntaxFormattingOptions formattingOptions,
VirtualCharSequence characters,
CancellationToken cancellationToken)
{
// Have to make sure we have a delimiter longer than any quote sequence in the string.
var longestQuoteSequence = GetLongestQuoteSequence(characters);
var quoteDelimeterCount = Math.Max(3, longestQuoteSequence + 1);
Expand All @@ -159,7 +318,7 @@ private static SyntaxToken ConvertToMultiLineRawIndentedString(Document document
for (int i = 0, n = characters.Length; i < n; i++)
{
var ch = characters[i];
if (IsNewLine(ch))
if (IsCSharpNewLine(ch))
{
ch.AppendTo(builder);
atStartOfLine = true;
Expand Down Expand Up @@ -187,39 +346,9 @@ private static SyntaxToken ConvertToMultiLineRawIndentedString(Document document
token.TrailingTrivia);
}

private static SyntaxToken ConvertToMultiLineRawString(SyntaxToken token, string newLine)
private static SyntaxToken ConvertToSingleLineRawString(
SyntaxToken token, VirtualCharSequence characters)
{
var characters = CSharpVirtualCharService.Instance.TryConvertToVirtualChars(token);
Contract.ThrowIfTrue(characters.IsDefaultOrEmpty);

// Have to make sure we have a delimiter longer than any quote sequence in the string.
var longestQuoteSequence = GetLongestQuoteSequence(characters);
var quoteDelimeterCount = Math.Max(3, longestQuoteSequence + 1);

using var _ = PooledStringBuilder.GetInstance(out var builder);

builder.Append('"', quoteDelimeterCount);
builder.Append(newLine);

foreach (var ch in characters)
ch.AppendTo(builder);

builder.Append(newLine);
builder.Append('"', quoteDelimeterCount);

return SyntaxFactory.Token(
token.LeadingTrivia,
SyntaxKind.MultiLineRawStringLiteralToken,
builder.ToString(),
characters.CreateString(),
token.TrailingTrivia);
}

private static SyntaxToken ConvertToSingleLineRawString(SyntaxToken token)
{
var characters = CSharpVirtualCharService.Instance.TryConvertToVirtualChars(token);
Contract.ThrowIfTrue(characters.IsDefaultOrEmpty);

// Have to make sure we have a delimiter longer than any quote sequence in the string.
var longestQuoteSequence = GetLongestQuoteSequence(characters);
var quoteDelimeterCount = Math.Max(3, longestQuoteSequence + 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

using System;
using System.Globalization;
using System.Reflection.PortableExecutable;
using System.Text;
using Microsoft.CodeAnalysis.Completion;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.Shared.Extensions;
Expand All @@ -23,15 +25,25 @@ public static bool CanBeSingleLine(VirtualCharSequence characters)
}

// a single line raw string cannot contain a newline.
if (characters.Any(static ch => IsNewLine(ch)))
if (characters.Any(static ch => IsCSharpNewLine(ch)))
return false;

return true;
}

public static bool IsNewLine(VirtualChar ch)
public static bool IsCSharpNewLine(VirtualChar ch)
=> ch.Rune.Utf16SequenceLength == 1 && SyntaxFacts.IsNewLine((char)ch.Value);

public static bool IsCSharpWhitespace(VirtualChar ch)
=> ch.Rune.Utf16SequenceLength == 1 && SyntaxFacts.IsWhitespace((char)ch.Value);

public static bool IsCarriageReturnNewLine(VirtualCharSequence characters, int index)
{
return index + 1 < characters.Length &&
characters[index].Rune is { Utf16SequenceLength: 1, Value: '\r' } &&
characters[index + 1].Rune is { Utf16SequenceLength: 1, Value: '\n' };
}

public static bool AllEscapesAreQuotes(VirtualCharSequence sequence)
=> AllEscapesAre(sequence, static ch => ch.Value == '"');

Expand Down Expand Up @@ -80,7 +92,7 @@ public static bool CanConvert(VirtualChar ch)
if (ch.Span.Length > 1)
{
// An escaped newline is fine to convert (to a multi-line raw string).
if (IsNewLine(ch))
if (IsCSharpNewLine(ch))
return true;

// Control/formatting unicode escapes should stay as escapes. The user code will just be enormously
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading