Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add limited support for backtracking Regex single char loops to simplified code gen #60385

Merged
merged 3 commits into from
Oct 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
<Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs" Link="Production\ValueStringBuilder.cs" />
<Compile Include="$(CoreLibSharedDir)System\Collections\Generic\ValueListBuilder.cs" Link="Production\ValueListBuilder.cs" />
<Compile Include="..\src\System\Collections\Generic\ValueListBuilder.Pop.cs" Link="Production\ValueListBuilder.Pop.cs" />
<Compile Include="..\src\System\Threading\StackHelper.cs" Link="Production\StackHelper.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexBoyerMoore.cs" Link="Production\RegexBoyerMoore.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.cs" Link="Production\RegexCharClass.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" Link="Production\RegexCharClass.MappingTable.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<ItemGroup>
<Compile Include="System\Collections\HashtableExtensions.cs" />
<Compile Include="System\Collections\Generic\ValueListBuilder.Pop.cs" />
<Compile Include="System\Threading\StackHelper.cs" />
<Compile Include="System\Text\SegmentStringBuilder.cs" />
<Compile Include="System\Text\RegularExpressions\Capture.cs" />
<Compile Include="System\Text\RegularExpressions\CaptureCollection.cs" />
Expand All @@ -17,8 +18,8 @@
<Compile Include="System\Text\RegularExpressions\MatchCollection.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Cache.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Match.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Debug.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Match.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Replace.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Split.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
Expand Down Expand Up @@ -53,7 +54,6 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\DfaMatchingState.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\MintermClassifier.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\RegexNodeToSymbolicConverter.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\StackHelper.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicMatch.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicNFA.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexBuilder.cs" />
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -1659,12 +1659,6 @@ private char ScanControl()
throw MakeException(RegexParseError.UnrecognizedControlCharacter, SR.UnrecognizedControlCharacter);
}

/// <summary>Returns true for options allowed only at the top level</summary>
private bool IsOnlyTopOption(RegexOptions options) =>
options == RegexOptions.RightToLeft ||
options == RegexOptions.CultureInvariant ||
options == RegexOptions.ECMAScript;

/// <summary>Scans cimsx-cimsx option string, stops at the first unrecognized char.</summary>
private void ScanOptions()
{
Expand All @@ -1683,7 +1677,7 @@ private void ScanOptions()
else
{
RegexOptions options = OptionFromCode(ch);
if (options == 0 || IsOnlyTopOption(options))
if (options == 0)
{
return;
}
Expand Down Expand Up @@ -1804,15 +1798,13 @@ private static RegexOptions OptionFromCode(char ch)
return ch switch
{
'i' => RegexOptions.IgnoreCase,
'r' => RegexOptions.RightToLeft,
'm' => RegexOptions.Multiline,
'n' => RegexOptions.ExplicitCapture,
's' => RegexOptions.Singleline,
'x' => RegexOptions.IgnorePatternWhitespace,
#if DEBUG
'd' => RegexOptions.Debug,
#endif
'e' => RegexOptions.ECMAScript,
_ => 0,
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Threading;

namespace System.Text.RegularExpressions.Symbolic
{
Expand Down Expand Up @@ -201,11 +202,9 @@ BDD MapCategoryCodeToCondition(int code) =>
public SymbolicRegexNode<BDD> Convert(RegexNode node, bool topLevel)
{
// Guard against stack overflow due to deep recursion
if (!RuntimeHelpers.TryEnsureSufficientExecutionStack())
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
RegexNode localNode = node;
bool localTopLevel = topLevel;
return StackHelper.CallOnEmptyStack(() => Convert(localNode, localTopLevel));
return StackHelper.CallOnEmptyStack(Convert, node, topLevel);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
}

switch (node.Type)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Threading;

namespace System.Text.RegularExpressions.Symbolic
{
Expand Down Expand Up @@ -618,11 +619,10 @@ public SymbolicRegexNode<S> Restrict(S pred)
/// </summary>
public int GetFixedLength()
{
// Guard against stack overflow due to deep recursion.
if (!RuntimeHelpers.TryEnsureSufficientExecutionStack())
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
SymbolicRegexNode<S> thisRef = this;
return StackHelper.CallOnEmptyStack(() => thisRef.GetFixedLength());
// If we can't recur further, assume no fixed length.
return -1;
}

switch (_kind)
Expand Down Expand Up @@ -690,11 +690,9 @@ public int GetFixedLength()
internal SymbolicRegexNode<S> MkDerivative(S elem, uint context)
{
// Guard against stack overflow due to deep recursion
if (!RuntimeHelpers.TryEnsureSufficientExecutionStack())
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
S localElem = elem;
uint localContext = context;
return StackHelper.CallOnEmptyStack(() => MkDerivative(localElem, localContext));
return StackHelper.CallOnEmptyStack(MkDerivative, elem, context);
}

if (this == _builder._anyStar || this == _builder._nothing)
Expand Down Expand Up @@ -1100,10 +1098,9 @@ public override string ToString()
internal void ToString(StringBuilder sb)
{
// Guard against stack overflow due to deep recursion
if (!RuntimeHelpers.TryEnsureSufficientExecutionStack())
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
StringBuilder localSb = sb;
StackHelper.CallOnEmptyStack(() => ToString(localSb));
StackHelper.CallOnEmptyStack(ToString, sb);
return;
}

Expand Down Expand Up @@ -1665,12 +1662,9 @@ private S ComputeStartSet()
internal SymbolicRegexNode<S> PruneAnchors(uint prevKind, bool contWithWL, bool contWithNWL)
{
// Guard against stack overflow due to deep recursion
if (!RuntimeHelpers.TryEnsureSufficientExecutionStack())
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
uint localPrevKind = prevKind;
bool localContWithWL = contWithWL;
bool localContWithNWL = contWithNWL;
return StackHelper.CallOnEmptyStack(() => PruneAnchors(localPrevKind, localContWithWL, localContWithNWL));
return StackHelper.CallOnEmptyStack(PruneAnchors, prevKind, contWithWL, contWithNWL);
}

if (!_info.StartsWithSomeAnchor)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.CompilerServices;
using System.Threading.Tasks;

namespace System.Threading
{
/// <summary>Provides tools for avoiding stack overflows.</summary>
internal static class StackHelper
{
/// <summary>Tries to ensure there is sufficient stack to execute the average .NET function.</summary>
public static bool TryEnsureSufficientExecutionStack()
{
#if REGEXGENERATOR
try
{
RuntimeHelpers.EnsureSufficientExecutionStack();
return true;
}
catch
{
return false;
}
#else
return RuntimeHelpers.TryEnsureSufficientExecutionStack();
#endif
}

// Queues the supplied delegate to the thread pool, then block waiting for it to complete.
// It does so in a way that prevents task inlining (which would defeat the purpose) but that
// also plays nicely with the thread pool's sync-over-async aggressive thread injection policies.

/// <summary>Calls the provided action on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <param name="action">The action to invoke.</param>
/// <param name="arg1">The first argument to pass to the action.</param>
public static void CallOnEmptyStack<TArg1>(Action<TArg1> action, TArg1 arg1) =>
Task.Run(() => action(arg1))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();

/// <summary>Calls the provided action on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <param name="action">The action to invoke.</param>
/// <param name="arg1">The first argument to pass to the action.</param>
/// <param name="arg2">The second argument to pass to the action.</param>
/// <param name="arg3">The second argument to pass to the action.</param>
public static void CallOnEmptyStack<TArg1, TArg2, TArg3>(Action<TArg1, TArg2, TArg3> action, TArg1 arg1, TArg2 arg2, TArg3 arg3) =>
Task.Run(() => action(arg1, arg2, arg3))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();

/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TResult">The return type of the function.</typeparam>
/// <param name="func">The function to invoke.</param>
/// <param name="arg1">The first argument to pass to the function.</param>
/// <param name="arg2">The second argument to pass to the function.</param>
public static TResult CallOnEmptyStack<TArg1, TArg2, TResult>(Func<TArg1, TArg2, TResult> func, TArg1 arg1, TArg2 arg2) =>
Task.Run(() => func(arg1, arg2))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();

/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TResult">The return type of the function.</typeparam>
/// <param name="func">The function to invoke.</param>
/// <param name="arg1">The first argument to pass to the function.</param>
/// <param name="arg2">The second argument to pass to the function.</param>
/// <param name="arg3">The third argument to pass to the function.</param>
public static TResult CallOnEmptyStack<TArg1, TArg2, TArg3, TResult>(Func<TArg1, TArg2, TArg3, TResult> func, TArg1 arg1, TArg2 arg2, TArg3 arg3) =>
Task.Run(() => func(arg1, arg2, arg3))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ public static IEnumerable<object[]> Match_MemberData()
// for it to be a success. For a correct match, remove the last character, '3' from the pattern
yield return ("[^0-9]+(?>[0-9]+)3", "abc123", RegexOptions.None, 0, 6, false, string.Empty);
yield return ("[^0-9]+(?>[0-9]+)", "abc123", RegexOptions.None, 0, 6, true, "abc123");

yield return (@"(?!.*a)\w*g", "bcaefg", RegexOptions.None, 0, 6, true, "efg");
yield return (@"(?!.*a)\w*g", "aaaaag", RegexOptions.None, 0, 6, true, "g");
yield return (@"(?!.*a)\w*g", "aaaaaa", RegexOptions.None, 0, 6, false, string.Empty);
}

// More nonbacktracking expressions
Expand Down Expand Up @@ -175,6 +179,12 @@ public static IEnumerable<object[]> Match_MemberData()
yield return (@".*", "abc", lineOption, 2, 1, true, "c");
}

// Nested loops
if (!RegexHelpers.IsNonBacktracking(engine))
{
yield return ("a*(?:a[ab]*)*", "aaaababbbbbbabababababaaabbb", RegexOptions.None, 0, 28, true, "aaaa");
}

// Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z"
yield return (@"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzz", RegexOptions.IgnoreCase, 0, 17, true, "aaaasdfajsdlfjzzz");
yield return (@"\Aaaaaa\w+zzz\Z", "aaaa", RegexOptions.IgnoreCase, 0, 4, false, string.Empty);
Expand Down Expand Up @@ -344,7 +354,13 @@ public static IEnumerable<object[]> Match_MemberData()
yield return ("(?(cat)dog1|dog2)", "catdog1", RegexOptions.None, 0, 7, false, string.Empty);
yield return ("(?(cat)dog1|dog2)", "catdog2", RegexOptions.None, 0, 7, true, "dog2");
yield return ("(?(cat)dog1|dog2)", "catdog1dog2", RegexOptions.None, 0, 11, true, "dog2");
yield return (@"(\w+|\d+)a+[ab]+", "123123aa", RegexOptions.None, 0, 8, true, "123123aa");
yield return ("(a|ab|abc|abcd)d", "abcd", RegexOptions.RightToLeft, 0, 4, true, "abcd");
yield return ("(?>(?:a|ab|abc|abcd))d", "abcd", RegexOptions.None, 0, 4, false, string.Empty);
yield return ("(?>(?:a|ab|abc|abcd))d", "abcd", RegexOptions.RightToLeft, 0, 4, true, "abcd");
}
yield return ("[^a-z0-9]etag|[^a-z0-9]digest", "this string has .digest as a substring", RegexOptions.None, 16, 7, true, ".digest");
yield return (@"a\w*a|def", "aaaaa", RegexOptions.None, 0, 5, true, "aaaaa");

// No Negation
yield return ("[abcd-[abcd]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
Expand Down Expand Up @@ -1584,7 +1600,7 @@ public static IEnumerable<object[]> AllMatches_TestData()
};

// Case insensitive cases by using ?i and some non-ASCII characters like Kelvin sign and applying ?i over negated character classes
yield return new object[] { engine, "(?i:[a-]+k*)", RegexOptions.None, "xyxaBõc\u212AKAyy", new (int, int, string)[] { (3, 6, "aBõc\u212AK"), (9, 1, "A") } };
yield return new object[] { engine, "(?i:[a-d\u00D5]+k*)", RegexOptions.None, "xyxaB\u00F5c\u212AKAyy", new (int, int, string)[] { (3, 6, "aB\u00F5c\u212AK"), (9, 1, "A") } };
yield return new object[] { engine, "(?i:[a-d]+)", RegexOptions.None, "xyxaBcyy", new (int, int, string)[] { (3, 3, "aBc") } };
yield return new object[] { engine, "(?i:[\0-@B-\uFFFF]+)", RegexOptions.None, "xaAaAy", new (int, int, string)[] { (0, 6, "xaAaAy") } }; // this is the same as .+
yield return new object[] { engine, "(?i:[\0-ac-\uFFFF])", RegexOptions.None, "b", new (int, int, string)[] { (0, 1, "b") } };
Expand Down
Loading