diff --git a/CHANGELOG.md b/CHANGELOG.md index c78497e6..0916449b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] + +### Fixed +- Fixed a regression in the python language implementation for regexes [#119](https://github.com/cucumber/language-service/pull/119) + ### Added - Added support for JavaScript - [#42](https://github.com/cucumber/language-service/issues/42), [#115](https://github.com/cucumber/language-service/pull/115), [#120](https://github.com/cucumber/language-service/pull/120) diff --git a/src/language/pythonLanguage.ts b/src/language/pythonLanguage.ts index e8d86db5..7bb45398 100644 --- a/src/language/pythonLanguage.ts +++ b/src/language/pythonLanguage.ts @@ -8,7 +8,7 @@ export const pythonLanguage: Language = { case 'string': { return stringLiteral(node) } - case 'concatednated_string': { + case 'concatenated_string': { return stringLiteral(node) } case 'identifier': { @@ -20,18 +20,15 @@ export const pythonLanguage: Language = { } }, toParameterTypeRegExps(node: TreeSitterSyntaxNode) { - return RegExp(cleanRegex(stringLiteral(node))) + return RegExp(cleanRegExp(stringLiteral(node))) }, toStepDefinitionExpression(node: TreeSitterSyntaxNode): StringOrRegExp { - // this removes the head and tail apostrophes - // remove python named capture groups. + // This removes the head and tail apostrophes. // TODO: This should be temporary. Python supports // a wider array of regex features than javascript // a singular way of communicating regex consistent // across languages is necessary - return isRegex(node.text.slice(1, -1)) - ? RegExp(cleanRegex(node.text.slice(1, -1).split('?P').join(''))) - : node.text.slice(1, -1) + return toStringOrRegExp(node.text) }, defineParameterTypeQueries: [ `(call @@ -56,13 +53,13 @@ export const pythonLanguage: Language = { defineStepDefinitionQueries: [ ` (decorated_definition - (decorator - (call - function: (identifier) @method - arguments: (argument_list (string) @expression) - ) + (decorator + (call + function: (identifier) @method + arguments: (argument_list (string) @expression) ) - (#match? @method "(given|when|then)") + ) + (#match? @method "(given|when|then)") ) @root `, ], @@ -86,25 +83,32 @@ export const pythonLanguage: Language = { # Please convert to use regular expressions, as Behave does not currently support Cucumber Expressions`, } -function cleanRegex(regexString: string) { - const startsWith = regexString[0] +function cleanRegExp(regExpString: string): string { + const startsWith = regExpString[0] switch (startsWith) { case '/': - return regexString.slice(1, -1) + return regExpString.slice(1, -1) default: - return regexString + return regExpString } } +export function toStringOrRegExp(step: string): StringOrRegExp { + return isRegExp(step.slice(1, -1)) + ? RegExp(cleanRegExp(step.slice(1, -1).split('?P').join(''))) + : step.slice(1, -1) +} -function stringLiteral(node: TreeSitterSyntaxNode) { +function stringLiteral(node: TreeSitterSyntaxNode): string { const isFString = node.text.startsWith('f') const cleanWord = isFString ? node.text.slice(1).slice(1, -1) : node.text.slice(1, -1) return cleanWord } -function isRegex(cleanWord: string) { +export function isRegExp(cleanWord: string): boolean { const startsWithSlash = cleanWord.startsWith('/') const namedGroupMatch = /\?P/ + const specialCharsMatch = /\(|\)|\.|\*|\\|\|/ const containsNamedGroups = namedGroupMatch.test(cleanWord) - return startsWithSlash || containsNamedGroups + const containsSpecialChars = specialCharsMatch.test(cleanWord) + return startsWithSlash || containsNamedGroups || containsSpecialChars } diff --git a/test/language/pythonLanguage.test.ts b/test/language/pythonLanguage.test.ts new file mode 100644 index 00000000..fd8c67d8 --- /dev/null +++ b/test/language/pythonLanguage.test.ts @@ -0,0 +1,19 @@ +import assert from 'assert' + +import { toStringOrRegExp } from '../../src/language/pythonLanguage.js' + +describe('pythonLanguage', () => { + it('should identify and return regexes correctly', () => { + // NOTE these are strings that would look like from tree-sitter + const regexes = ['"Something (.*)"', '"Catch them digits \\d+"'] + regexes.forEach(function (regex) { + assert(toStringOrRegExp(regex) instanceof RegExp) + }) + }) + it('should identify normal strings and just return a string', () => { + const nonregexes = ['"test"'] + nonregexes.forEach(function (nonregex) { + assert(toStringOrRegExp(nonregex) == 'test') + }) + }) +})