From c9f9697fe7d10c0c1d3f17cb258518bc8d4e7cc6 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Fri, 3 Dec 2021 12:52:18 -0700 Subject: [PATCH] Add parser tests from nim-regex (#62093) * new parser tests * baseline * Nim tests * typos * positive cases * new parser tests * change to \u --- .../tests/RegexParserTests.cs | 26 +++- .../tests/RegexParserTests.netcoreapp.cs | 118 +++++++++++++++++- .../tests/RegexParserTests.netfx.cs | 2 +- .../tests/THIRD-PARTY-NOTICES.TXT | 27 +++- 4 files changed, 166 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs index 846fcbce60fa6..8fa496d6d7bdb 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs @@ -714,11 +714,12 @@ private static void Parse(string pattern, RegexOptions options, RegexParseError? if (error != null) { Assert.InRange(offset, 0, int.MaxValue); - Throws(error.Value, offset, () => new Regex(pattern, options)); + Throws(pattern, options, error.Value, offset, () => new Regex(pattern, options)); return; } Assert.Equal(-1, offset); + LogActual(pattern, options, RegexParseError.Unknown, -1); // Nothing to assert here without having access to internals. new Regex(pattern, options); // Does not throw @@ -726,6 +727,27 @@ private static void Parse(string pattern, RegexOptions options, RegexParseError? ParsePatternFragments(pattern, options); } + private static void LogActual(string pattern, RegexOptions options, RegexParseError error, int offset) + { + // To conveniently add new interesting patterns to these tests, add them to the code in the format: + // + // [InlineData("SOMEREGEX1", RegexOptions.None, null)] + // [InlineData("SOMEREGEX2", RegexOptions.None, null)] + // ... + // + // then uncomment the lines below, and the correct baseline will be written to the file, eg + // + // [InlineData(@"SOMEREGEX1", RegexOptions.None, RegexParseError.UnrecognizedEscape, 3)] + // [InlineData(@"SOMEREGEX2", RegexOptions.None, InsufficientClosingParentheses, 2)] + // ... + // + //string s = (error == RegexParseError.Unknown) ? + // @$" [InlineData(@""{pattern}"", RegexOptions.{options.ToString()}, null)]" : + // @$" [InlineData(@""{pattern}"", RegexOptions.{options.ToString()}, RegexParseError.{error.ToString()}, {offset})]"; + + // File.AppendAllText(@"/tmp/out.cs", s + "\n"); + } + private static void ParsePatternFragments(string pattern, RegexOptions options) { // Trim the input in various places and parse. @@ -755,7 +777,7 @@ private static void ParsePatternFragments(string pattern, RegexOptions options) /// /// The expected parse error /// The action to invoke. - static partial void Throws(RegexParseError error, int offset, Action action); + static partial void Throws(string pattern, RegexOptions options, RegexParseError error, int offset, Action action); /// /// Checks that action succeeds or throws either a RegexParseException or an ArgumentException depending on the diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netcoreapp.cs index cc1d5f8658fe9..13ff23b7dde4d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netcoreapp.cs @@ -110,7 +110,7 @@ public partial class RegexParserTests [InlineData(@"(?P.)(?P.)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)] [InlineData(@"[a-\A]", RegexOptions.None, RegexParseError.UnrecognizedEscape, 5)] [InlineData(@"[a-\z]", RegexOptions.None, RegexParseError.UnrecognizedEscape, 5)] - [InlineData(@"[a-\b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)] + [InlineData(@"[a-\b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)] // Nim: not an error [InlineData(@"[a-\-]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)] [InlineData(@"[a-\-b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)] [InlineData(@"[a-\-\-b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)] @@ -127,6 +127,115 @@ public partial class RegexParserTests [InlineData(@"[a-[:lower:]]", RegexOptions.None, null)] // errors in rust: range_end_no_class // End of Rust parser tests ============== + // Following are borrowed from Nim tests + // https://github.com/nitely/nim-regex/blob/eeefb4f51264ff3bc3b36caf55672a74f52f5ef5/tests/tests.nim + [InlineData(@"?", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"?|?", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"?abc", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)] // Nim: not an error + [InlineData(@"(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)] + [InlineData(@"(?u-q)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)] + [InlineData(@"(?uq)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)] + [InlineData(@"(\b)", RegexOptions.None, null)] + [InlineData(@"(+)", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 2)] + [InlineData(@"(a)b)", RegexOptions.None, RegexParseError.InsufficientOpeningParentheses, 5)] + [InlineData(@"(b(a)", RegexOptions.None, RegexParseError.InsufficientClosingParentheses, 5)] + [InlineData(@"[-", RegexOptions.None, RegexParseError.UnterminatedBracket, 2)] + [InlineData(@"[-a", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)] + [InlineData(@"[[:abc:]]", RegexOptions.None, null)] // Nim: "Invalid ascii set. `abc` is not a valid name" + [InlineData(@"[[:alnum:", RegexOptions.None, RegexParseError.UnterminatedBracket, 9)] + [InlineData(@"[[:alnum]]", RegexOptions.None, null)] // Nim: "Invalid ascii set. Expected [:name:]" + [InlineData(@"[]", RegexOptions.None, RegexParseError.UnterminatedBracket, 2)] + [InlineData(@"[]a", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)] + [InlineData(@"[]abc", RegexOptions.None, RegexParseError.UnterminatedBracket, 5)] + [InlineData(@"[\\", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)] + [InlineData(@"[^]", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)] + [InlineData(@"[a-", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)] + [InlineData(@"[a-\w]", RegexOptions.None, RegexParseError.ShorthandClassInCharacterRange, 5)] + [InlineData(@"[a", RegexOptions.None, RegexParseError.UnterminatedBracket, 2)] + [InlineData(@"[abc", RegexOptions.None, RegexParseError.UnterminatedBracket, 4)] + [InlineData(@"[d-c]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 4)] + [InlineData(@"[z-[:alnum:]]", RegexOptions.None, null)] // Nim: "Invalid set range. Start must be lesser than end" + [InlineData(@"{10}", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"*abc", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"\12", RegexOptions.None, null)] // Nim: "Invalid octal literal. Expected 3 octal digits, but found 2" + [InlineData(@"\12@", RegexOptions.None, null)] // Nim: "Invalid octal literal. Expected octal digit, but found @" + [InlineData(@"\b?", RegexOptions.None, null)] + [InlineData(@"\b*", RegexOptions.None, null)] + [InlineData(@"\b+", RegexOptions.None, null)] + [InlineData(@"\p{11", RegexOptions.None, RegexParseError.InvalidUnicodePropertyEscape, 5)] + [InlineData(@"\p{11}", RegexOptions.None, RegexParseError.UnrecognizedUnicodeProperty, 6)] + [InlineData(@"\p{Bb}", RegexOptions.None, RegexParseError.UnrecognizedUnicodeProperty, 6)] + [InlineData(@"\p11", RegexOptions.None, RegexParseError.InvalidUnicodePropertyEscape, 2)] + [InlineData(@"\pB", RegexOptions.None, RegexParseError.InvalidUnicodePropertyEscape, 2)] + [InlineData(@"\u123", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 2)] + [InlineData(@"\U123", RegexOptions.None, RegexParseError.UnrecognizedEscape, 2)] + [InlineData(@"\U123@a", RegexOptions.None, RegexParseError.UnrecognizedEscape, 2)] + [InlineData(@"\u123@abc", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 6)] + [InlineData(@"\UFFFFFFFF", RegexOptions.None, RegexParseError.UnrecognizedEscape, 2)] + [InlineData(@"\x{00000000A}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)] + [InlineData(@"\x{2f894", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)] + [InlineData(@"\x{61@}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)] + [InlineData(@"\x{7fffffff}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)] // Nim: not an error (supports Unicode beyond basic multilingual plane) + [InlineData(@"\x{FFFFFFFF}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)] + [InlineData(@"+", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"+abc", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)] + [InlineData(@"a???", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)] + [InlineData(@"a??*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)] + [InlineData(@"a??+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)] + [InlineData(@"a?*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)] + [InlineData(@"a?+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)] + [InlineData(@"a(?P<>abc)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 4)] + [InlineData(@"a(?P /// The expected parse error /// The action to invoke. - static partial void Throws(RegexParseError error, int offset, Action action) + static partial void Throws(string pattern, RegexOptions options, RegexParseError error, int offset, Action action) { try { @@ -171,16 +280,19 @@ static partial void Throws(RegexParseError error, int offset, Action action) if (error == regexParseError) { Assert.Equal(offset, e.Offset); + LogActual(pattern, options, regexParseError, e.Offset); return; } + LogActual(pattern, options, regexParseError, e.Offset); throw new XunitException($"Expected RegexParseException with error {error} offset {offset} -> Actual error: {regexParseError} offset {e.Offset})"); } catch (Exception e) { - throw new XunitException($"Expected RegexParseException -> Actual: ({e})"); + throw new XunitException($"Expected RegexParseException for pattern '{pattern}' -> Actual: ({e})"); } + LogActual(pattern, options, RegexParseError.Unknown, -1); throw new XunitException($"Expected RegexParseException with error: ({error}) -> Actual: No exception thrown"); } diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netfx.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netfx.cs index e70d29a8dd388..b7b3d56505a3c 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netfx.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netfx.cs @@ -16,7 +16,7 @@ public partial class RegexParserTests /// /// The expected parse error /// The action to invoke. - static partial void Throws(RegexParseError error, int offset, Action action) + static partial void Throws(string pattern, RegexOptions options, RegexParseError error, int offset, Action action) { try { diff --git a/src/libraries/System.Text.RegularExpressions/tests/THIRD-PARTY-NOTICES.TXT b/src/libraries/System.Text.RegularExpressions/tests/THIRD-PARTY-NOTICES.TXT index 8c51928cfb1e7..b5f4599ed3a33 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/THIRD-PARTY-NOTICES.TXT +++ b/src/libraries/System.Text.RegularExpressions/tests/THIRD-PARTY-NOTICES.TXT @@ -35,4 +35,29 @@ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. \ No newline at end of file +DEALINGS IN THE SOFTWARE. + +License notice for https://github.com/nitely/nim-regex +------------------------------- + +MIT License + +Copyright (c) 2017 Esteban Castro Borsani + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file