feat: no-misleading-character-class granular errors (#17515)

* feat: `no-misleading-character-class` granular errors * fix: column offsets * fix: missing CallExpression * Apply suggestions from code review Co-authored-by: Francesco Trotta <[email protected]> * All tests passing again * Edge case: quadruple back slashes * Apply suggestions from code review Co-authored-by: Francesco Trotta <[email protected]> * Update lib/rules/no-misleading-character-class.js Co-authored-by: Francesco Trotta <[email protected]> * Adjusted for repeat characters * Separated kinds into a Set * Update lib/rules/no-misleading-character-class.js Co-authored-by: Francesco Trotta <[email protected]> * Adjust unit tests for accepted changes * Split into adjustment function for sequence pairs * Reduced complexity because of edge cases, as reqiested * Use chars array as suggested - mostly there * Checked that slices include first and last * Used sourceCode.getText(node) over node.raw, and added comments * Update lib/rules/no-misleading-character-class.js * Drastically limit applicability * Restricted regexp literals on \u * Update lib/rules/no-misleading-character-class.js Co-authored-by: Milos Djermanovic <[email protected]> * Corrected erroneous escaped u exclusion * Update lib/rules/no-misleading-character-class.js Co-authored-by: Francesco Trotta <[email protected]> * If a report range is missing, skip other reports * Update lib/rules/no-misleading-character-class.js Co-authored-by: Milos Djermanovic <[email protected]> * Merge branch 'main' * Implement fasttime suggestion for a Map * nit: for loop * Unify zwj reports * Added a couple of multiline tests * use .at(-1) --------- Co-authored-by: Francesco Trotta <[email protected]> Co-authored-by: Milos Djermanovic <[email protected]>
eslint · Jan 7, 2024 · 287c4b7 · 287c4b7
1 parent d31c180
commit 287c4b7
Show file tree

Hide file tree

Showing 2 changed files with 777 additions and 152 deletions.
diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js
@@ -62,7 +62,6 @@ function *iterateCharacterSequence(nodes) {
     }
 }
 
-
 /**
  * Checks whether the given character node is a Unicode code point escape or not.
  * @param {Character} char the character node to check.
@@ -73,80 +72,120 @@ function isUnicodeCodePointEscape(char) {
 }
 
 /**
- * Each function returns `true` if it detects that kind of problem.
- * @type {Record<string, (chars: Character[]) => boolean>}
+ * Each function returns matched characters if it detects that kind of problem.
+ * @type {Record<string, (char: Character, index: number, chars: Character[]) => Character[] | null>}
  */
-const hasCharacterSequence = {
-    surrogatePairWithoutUFlag(chars) {
-        return chars.some((c, i) => {
-            if (i === 0) {
-                return false;
-            }
-            const c1 = chars[i - 1];
-
-            return (
-                isSurrogatePair(c1.value, c.value) &&
-                !isUnicodeCodePointEscape(c1) &&
-                !isUnicodeCodePointEscape(c)
-            );
-        });
+const characterSequenceIndexFilters = {
+    surrogatePairWithoutUFlag(char, index, chars) {
+        if (index === 0) {
+            return null;
+        }
+
+        const previous = chars[index - 1];
+
+        if (
+            isSurrogatePair(previous.value, char.value) &&
+            !isUnicodeCodePointEscape(previous) &&
+            !isUnicodeCodePointEscape(char)
+        ) {
+            return [previous, char];
+        }
+
+        return null;
     },
 
-    surrogatePair(chars) {
-        return chars.some((c, i) => {
-            if (i === 0) {
-                return false;
-            }
-            const c1 = chars[i - 1];
-
-            return (
-                isSurrogatePair(c1.value, c.value) &&
-                (
-                    isUnicodeCodePointEscape(c1) ||
-                    isUnicodeCodePointEscape(c)
-                )
-            );
-        });
+    surrogatePair(char, index, chars) {
+        if (index === 0) {
+            return null;
+        }
+
+        const previous = chars[index - 1];
+
+        if (
+            isSurrogatePair(previous.value, char.value) &&
+            (
+                isUnicodeCodePointEscape(previous) ||
+                isUnicodeCodePointEscape(char)
+            )
+        ) {
+            return [previous, char];
+        }
+
+        return null;
     },
 
-    combiningClass(chars) {
-        return chars.some((c, i) => (
-            i !== 0 &&
-            isCombiningCharacter(c.value) &&
-            !isCombiningCharacter(chars[i - 1].value)
-        ));
+    combiningClass(char, index, chars) {
+        if (
+            index !== 0 &&
+            isCombiningCharacter(char.value) &&
+            !isCombiningCharacter(chars[index - 1].value)
+        ) {
+            return [chars[index - 1], char];
+        }
+
+        return null;
     },
 
-    emojiModifier(chars) {
-        return chars.some((c, i) => (
-            i !== 0 &&
-            isEmojiModifier(c.value) &&
-            !isEmojiModifier(chars[i - 1].value)
-        ));
+    emojiModifier(char, index, chars) {
+        if (
+            index !== 0 &&
+            isEmojiModifier(char.value) &&
+            !isEmojiModifier(chars[index - 1].value)
+        ) {
+            return [chars[index - 1], char];
+        }
+
+        return null;
     },
 
-    regionalIndicatorSymbol(chars) {
-        return chars.some((c, i) => (
-            i !== 0 &&
-            isRegionalIndicatorSymbol(c.value) &&
-            isRegionalIndicatorSymbol(chars[i - 1].value)
-        ));
+    regionalIndicatorSymbol(char, index, chars) {
+        if (
+            index !== 0 &&
+            isRegionalIndicatorSymbol(char.value) &&
+            isRegionalIndicatorSymbol(chars[index - 1].value)
+        ) {
+            return [chars[index - 1], char];
+        }
+
+        return null;
     },
 
-    zwj(chars) {
-        const lastIndex = chars.length - 1;
+    zwj(char, index, chars) {
+        if (
+            index !== 0 &&
+            index !== chars.length - 1 &&
+            char.value === 0x200d &&
+            chars[index - 1].value !== 0x200d &&
+            chars[index + 1].value !== 0x200d
+        ) {
+            return chars.slice(index - 1, index + 2);
+        }
 
-        return chars.some((c, i) => (
-            i !== 0 &&
-            i !== lastIndex &&
-            c.value === 0x200d &&
-            chars[i - 1].value !== 0x200d &&
-            chars[i + 1].value !== 0x200d
-        ));
+        return null;
     }
 };
 
-const kinds = Object.keys(hasCharacterSequence);
+const kinds = Object.keys(characterSequenceIndexFilters);
+
+/**
+ * Collects the indices where the filter returns an array.
+ * @param {Character[]} chars Characters to run the filter on.
+ * @param {(char: Character, index: number, chars: Character[]) => Character[] | null} filter Finds matches for an index.
+ * @returns {Character[][]} Indices where the filter returned true.
+ */
+function accumulate(chars, filter) {
+    const matchingChars = [];
+
+    chars.forEach((char, index) => {
+        const matches = filter(char, index, chars);
+
+        if (matches) {
+            matchingChars.push(matches);
+        }
+    });
+
+    return matchingChars;
+}
 
 //------------------------------------------------------------------------------
 // Rule Definition
@@ -181,6 +220,62 @@ module.exports = {
         const sourceCode = context.sourceCode;
         const parser = new RegExpParser();
 
+        /**
+         * Generates a granular loc for context.report, if directly calculable.
+         * @param {Character[]} chars Individual characters being reported on.
+         * @param {Node} node Parent string node to report within.
+         * @returns {Object | null} Granular loc for context.report, if directly calculable.
+         * @see https://github.com/eslint/eslint/pull/17515
+         */
+        function generateReportLocation(chars, node) {
+
+            // Limit to to literals and expression-less templates with raw values === their value.
+            switch (node.type) {
+                case "TemplateLiteral":
+                    if (node.expressions.length || node.quasis[0].value.raw !== node.quasis[0].value.cooked) {
+                        return null;
+                    }
+                    break;
+
+                case "Literal":
+                    if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
+                        return null;
+                    }
+                    break;
+
+                default:
+                    return null;
+            }
+
+            return {
+                start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
+                end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end)
+            };
+        }
+
+        /**
+         * Finds the report loc(s) for a range of matches.
+         * @param {Character[][]} matches Characters that should trigger a report.
+         * @param {Node} node The node to report.
+         * @returns {Object | null} Node loc(s) for context.report.
+         */
+        function getNodeReportLocations(matches, node) {
+            const locs = [];
+
+            for (const chars of matches) {
+                const loc = generateReportLocation(chars, node);
+
+                // If a report can't match to a range, don't report any others
+                if (!loc) {
+                    return [node.loc];
+                }
+
+                locs.push(loc);
+            }
+
+            return locs;
+        }
+
         /**
          * Verify a given regular expression.
          * @param {Node} node The node to report.
@@ -208,21 +303,26 @@ module.exports = {
                 return;
             }
 
-            const foundKinds = new Set();
+            const foundKindMatches = new Map();
 
             visitRegExpAST(patternNode, {
                 onCharacterClassEnter(ccNode) {
                     for (const chars of iterateCharacterSequence(ccNode.elements)) {
                         for (const kind of kinds) {
-                            if (hasCharacterSequence[kind](chars)) {
-                                foundKinds.add(kind);
+                            const matches = accumulate(chars, characterSequenceIndexFilters[kind]);
+
+                            if (foundKindMatches.has(kind)) {
+                                foundKindMatches.get(kind).push(...matches);
+                            } else {
+                                foundKindMatches.set(kind, matches);
                             }
+
                         }
                     }
                 }
             });
 
-            for (const kind of foundKinds) {
+            for (const [kind, matches] of foundKindMatches) {
                 let suggest;
 
                 if (kind === "surrogatePairWithoutUFlag") {
@@ -232,11 +332,27 @@ module.exports = {
                     }];
                 }
 
-                context.report({
-                    node,
-                    messageId: kind,
-                    suggest
-                });
+                const locs = getNodeReportLocations(matches, node);
+
+                // Grapheme zero-width joiners (e.g. in 👨‍👩‍👦) visually show as one emoji
+                if (kind === "zwj" && locs.length > 1) {
+                    context.report({
+                        loc: {
+                            start: locs[0].start,
+                            end: locs[1].end
+                        },
+                        messageId: kind,
+                        suggest
+                    });
+                } else {
+                    for (const loc of locs) {
+                        context.report({
+                            loc,
+                            messageId: kind,
+                            suggest
+                        });
+                    }
+                }
             }
         }
 
@@ -267,7 +383,7 @@ module.exports = {
                     const flags = getStringIfConstant(flagsNode, scope);
 
                     if (typeof pattern === "string") {
-                        verify(refNode, pattern, flags || "", fixer => {
+                        verify(patternNode, pattern, flags || "", fixer => {
 
                             if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
                                 return null;