Skip to content

Commit

Permalink
feat: no-misleading-character-class granular errors (#17515)
Browse files Browse the repository at this point in the history
* feat: `no-misleading-character-class` granular errors

* fix: column offsets

* fix: missing CallExpression

* Apply suggestions from code review

Co-authored-by: Francesco Trotta <[email protected]>

* All tests passing again

* Edge case: quadruple back slashes

* Apply suggestions from code review

Co-authored-by: Francesco Trotta <[email protected]>

* Update lib/rules/no-misleading-character-class.js

Co-authored-by: Francesco Trotta <[email protected]>

* Adjusted for repeat characters

* Separated kinds into a Set

* Update lib/rules/no-misleading-character-class.js

Co-authored-by: Francesco Trotta <[email protected]>

* Adjust unit tests for accepted changes

* Split into adjustment function for sequence pairs

* Reduced complexity because of edge cases, as reqiested

* Use chars array as suggested - mostly there

* Checked that slices include first and last

* Used sourceCode.getText(node) over node.raw, and added comments

* Update lib/rules/no-misleading-character-class.js

* Drastically limit applicability

* Restricted regexp literals on \u

* Update lib/rules/no-misleading-character-class.js

Co-authored-by: Milos Djermanovic <[email protected]>

* Corrected erroneous escaped u exclusion

* Update lib/rules/no-misleading-character-class.js

Co-authored-by: Francesco Trotta <[email protected]>

* If a report range is missing, skip other reports

* Update lib/rules/no-misleading-character-class.js

Co-authored-by: Milos Djermanovic <[email protected]>

* Merge branch 'main'

* Implement fasttime suggestion for a Map

* nit: for loop

* Unify zwj reports

* Added a couple of multiline tests

* use .at(-1)

---------

Co-authored-by: Francesco Trotta <[email protected]>
Co-authored-by: Milos Djermanovic <[email protected]>
  • Loading branch information
3 people authored Jan 7, 2024
1 parent d31c180 commit 287c4b7
Show file tree
Hide file tree
Showing 2 changed files with 777 additions and 152 deletions.
256 changes: 186 additions & 70 deletions lib/rules/no-misleading-character-class.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ function *iterateCharacterSequence(nodes) {
}
}


/**
* Checks whether the given character node is a Unicode code point escape or not.
* @param {Character} char the character node to check.
Expand All @@ -73,80 +72,120 @@ function isUnicodeCodePointEscape(char) {
}

/**
* Each function returns `true` if it detects that kind of problem.
* @type {Record<string, (chars: Character[]) => boolean>}
* Each function returns matched characters if it detects that kind of problem.
* @type {Record<string, (char: Character, index: number, chars: Character[]) => Character[] | null>}
*/
const hasCharacterSequence = {
surrogatePairWithoutUFlag(chars) {
return chars.some((c, i) => {
if (i === 0) {
return false;
}
const c1 = chars[i - 1];

return (
isSurrogatePair(c1.value, c.value) &&
!isUnicodeCodePointEscape(c1) &&
!isUnicodeCodePointEscape(c)
);
});
const characterSequenceIndexFilters = {
surrogatePairWithoutUFlag(char, index, chars) {
if (index === 0) {
return null;
}

const previous = chars[index - 1];

if (
isSurrogatePair(previous.value, char.value) &&
!isUnicodeCodePointEscape(previous) &&
!isUnicodeCodePointEscape(char)
) {
return [previous, char];
}

return null;
},

surrogatePair(chars) {
return chars.some((c, i) => {
if (i === 0) {
return false;
}
const c1 = chars[i - 1];

return (
isSurrogatePair(c1.value, c.value) &&
(
isUnicodeCodePointEscape(c1) ||
isUnicodeCodePointEscape(c)
)
);
});
surrogatePair(char, index, chars) {
if (index === 0) {
return null;
}

const previous = chars[index - 1];

if (
isSurrogatePair(previous.value, char.value) &&
(
isUnicodeCodePointEscape(previous) ||
isUnicodeCodePointEscape(char)
)
) {
return [previous, char];
}

return null;
},

combiningClass(chars) {
return chars.some((c, i) => (
i !== 0 &&
isCombiningCharacter(c.value) &&
!isCombiningCharacter(chars[i - 1].value)
));
combiningClass(char, index, chars) {
if (
index !== 0 &&
isCombiningCharacter(char.value) &&
!isCombiningCharacter(chars[index - 1].value)
) {
return [chars[index - 1], char];
}

return null;
},

emojiModifier(chars) {
return chars.some((c, i) => (
i !== 0 &&
isEmojiModifier(c.value) &&
!isEmojiModifier(chars[i - 1].value)
));
emojiModifier(char, index, chars) {
if (
index !== 0 &&
isEmojiModifier(char.value) &&
!isEmojiModifier(chars[index - 1].value)
) {
return [chars[index - 1], char];
}

return null;
},

regionalIndicatorSymbol(chars) {
return chars.some((c, i) => (
i !== 0 &&
isRegionalIndicatorSymbol(c.value) &&
isRegionalIndicatorSymbol(chars[i - 1].value)
));
regionalIndicatorSymbol(char, index, chars) {
if (
index !== 0 &&
isRegionalIndicatorSymbol(char.value) &&
isRegionalIndicatorSymbol(chars[index - 1].value)
) {
return [chars[index - 1], char];
}

return null;
},

zwj(chars) {
const lastIndex = chars.length - 1;
zwj(char, index, chars) {
if (
index !== 0 &&
index !== chars.length - 1 &&
char.value === 0x200d &&
chars[index - 1].value !== 0x200d &&
chars[index + 1].value !== 0x200d
) {
return chars.slice(index - 1, index + 2);
}

return chars.some((c, i) => (
i !== 0 &&
i !== lastIndex &&
c.value === 0x200d &&
chars[i - 1].value !== 0x200d &&
chars[i + 1].value !== 0x200d
));
return null;
}
};

const kinds = Object.keys(hasCharacterSequence);
const kinds = Object.keys(characterSequenceIndexFilters);

/**
* Collects the indices where the filter returns an array.
* @param {Character[]} chars Characters to run the filter on.
* @param {(char: Character, index: number, chars: Character[]) => Character[] | null} filter Finds matches for an index.
* @returns {Character[][]} Indices where the filter returned true.
*/
function accumulate(chars, filter) {
const matchingChars = [];

chars.forEach((char, index) => {
const matches = filter(char, index, chars);

if (matches) {
matchingChars.push(matches);
}
});

return matchingChars;
}

//------------------------------------------------------------------------------
// Rule Definition
Expand Down Expand Up @@ -181,6 +220,62 @@ module.exports = {
const sourceCode = context.sourceCode;
const parser = new RegExpParser();

/**
* Generates a granular loc for context.report, if directly calculable.
* @param {Character[]} chars Individual characters being reported on.
* @param {Node} node Parent string node to report within.
* @returns {Object | null} Granular loc for context.report, if directly calculable.
* @see https://github.com/eslint/eslint/pull/17515
*/
function generateReportLocation(chars, node) {

// Limit to to literals and expression-less templates with raw values === their value.
switch (node.type) {
case "TemplateLiteral":
if (node.expressions.length || node.quasis[0].value.raw !== node.quasis[0].value.cooked) {
return null;
}
break;

case "Literal":
if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
return null;
}
break;

default:
return null;
}

return {
start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end)
};
}

/**
* Finds the report loc(s) for a range of matches.
* @param {Character[][]} matches Characters that should trigger a report.
* @param {Node} node The node to report.
* @returns {Object | null} Node loc(s) for context.report.
*/
function getNodeReportLocations(matches, node) {
const locs = [];

for (const chars of matches) {
const loc = generateReportLocation(chars, node);

// If a report can't match to a range, don't report any others
if (!loc) {
return [node.loc];
}

locs.push(loc);
}

return locs;
}

/**
* Verify a given regular expression.
* @param {Node} node The node to report.
Expand Down Expand Up @@ -208,21 +303,26 @@ module.exports = {
return;
}

const foundKinds = new Set();
const foundKindMatches = new Map();

visitRegExpAST(patternNode, {
onCharacterClassEnter(ccNode) {
for (const chars of iterateCharacterSequence(ccNode.elements)) {
for (const kind of kinds) {
if (hasCharacterSequence[kind](chars)) {
foundKinds.add(kind);
const matches = accumulate(chars, characterSequenceIndexFilters[kind]);

if (foundKindMatches.has(kind)) {
foundKindMatches.get(kind).push(...matches);
} else {
foundKindMatches.set(kind, matches);
}

}
}
}
});

for (const kind of foundKinds) {
for (const [kind, matches] of foundKindMatches) {
let suggest;

if (kind === "surrogatePairWithoutUFlag") {
Expand All @@ -232,11 +332,27 @@ module.exports = {
}];
}

context.report({
node,
messageId: kind,
suggest
});
const locs = getNodeReportLocations(matches, node);

// Grapheme zero-width joiners (e.g. in 👨‍👩‍👦) visually show as one emoji
if (kind === "zwj" && locs.length > 1) {
context.report({
loc: {
start: locs[0].start,
end: locs[1].end
},
messageId: kind,
suggest
});
} else {
for (const loc of locs) {
context.report({
loc,
messageId: kind,
suggest
});
}
}
}
}

Expand Down Expand Up @@ -267,7 +383,7 @@ module.exports = {
const flags = getStringIfConstant(flagsNode, scope);

if (typeof pattern === "string") {
verify(refNode, pattern, flags || "", fixer => {
verify(patternNode, pattern, flags || "", fixer => {

if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
return null;
Expand Down
Loading

0 comments on commit 287c4b7

Please sign in to comment.