Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/regex regression #119

Merged
merged 10 commits into from
Dec 8, 2022
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Fixed
- Fixed a regression in the python language implementation for regexes [#119](https://github.com/cucumber/language-service/pull/119)

### Added
- Added support for JavaScript - [#42](https://github.com/cucumber/language-service/issues/42), [#115](https://github.com/cucumber/language-service/pull/115), [#120](https://github.com/cucumber/language-service/pull/120)

Expand Down
44 changes: 24 additions & 20 deletions src/language/pythonLanguage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export const pythonLanguage: Language = {
case 'string': {
return stringLiteral(node)
}
case 'concatednated_string': {
case 'concatenated_string': {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed this typo and nothing broke, so I assume this path isn't tested.

return stringLiteral(node)
}
case 'identifier': {
Expand All @@ -20,18 +20,15 @@ export const pythonLanguage: Language = {
}
},
toParameterTypeRegExps(node: TreeSitterSyntaxNode) {
return RegExp(cleanRegex(stringLiteral(node)))
return RegExp(cleanRegExp(stringLiteral(node)))
},
toStepDefinitionExpression(node: TreeSitterSyntaxNode): StringOrRegExp {
// this removes the head and tail apostrophes
// remove python named capture groups.
// This removes the head and tail apostrophes.
// TODO: This should be temporary. Python supports
// a wider array of regex features than javascript
// a singular way of communicating regex consistent
// across languages is necessary
return isRegex(node.text.slice(1, -1))
? RegExp(cleanRegex(node.text.slice(1, -1).split('?P').join('')))
: node.text.slice(1, -1)
return toStringOrRegExp(node.text)
},
defineParameterTypeQueries: [
`(call
Expand All @@ -56,13 +53,13 @@ export const pythonLanguage: Language = {
defineStepDefinitionQueries: [
`
(decorated_definition
(decorator
(call
function: (identifier) @method
arguments: (argument_list (string) @expression)
)
(decorator
(call
function: (identifier) @method
arguments: (argument_list (string) @expression)
)
(#match? @method "(given|when|then)")
)
(#match? @method "(given|when|then)")
) @root
`,
],
Expand All @@ -86,25 +83,32 @@ export const pythonLanguage: Language = {
# Please convert to use regular expressions, as Behave does not currently support Cucumber Expressions`,
}

function cleanRegex(regexString: string) {
const startsWith = regexString[0]
function cleanRegExp(regExpString: string): string {
const startsWith = regExpString[0]
switch (startsWith) {
case '/':
return regexString.slice(1, -1)
return regExpString.slice(1, -1)
default:
return regexString
return regExpString
}
}
export function toStringOrRegExp(step: string): StringOrRegExp {
return isRegExp(step.slice(1, -1))
? RegExp(cleanRegExp(step.slice(1, -1).split('?P').join('')))
: step.slice(1, -1)
}

function stringLiteral(node: TreeSitterSyntaxNode) {
function stringLiteral(node: TreeSitterSyntaxNode): string {
const isFString = node.text.startsWith('f')
const cleanWord = isFString ? node.text.slice(1).slice(1, -1) : node.text.slice(1, -1)
return cleanWord
}

function isRegex(cleanWord: string) {
export function isRegExp(cleanWord: string): boolean {
const startsWithSlash = cleanWord.startsWith('/')
const namedGroupMatch = /\?P/
const specialCharsMatch = /\(|\)|\.|\*|\\|\|/
const containsNamedGroups = namedGroupMatch.test(cleanWord)
return startsWithSlash || containsNamedGroups
const containsSpecialChars = specialCharsMatch.test(cleanWord)
return startsWithSlash || containsNamedGroups || containsSpecialChars
}
19 changes: 19 additions & 0 deletions test/language/pythonLanguage.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import assert from 'assert'

import { toStringOrRegExp } from '../../src/language/pythonLanguage.js'

describe('pythonLanguage', () => {
it('should identify and return regexes correctly', () => {
// NOTE these are strings that would look like from tree-sitter
const regexes = ['"Something (.*)"', '"Catch them digits \\d+"']
regexes.forEach(function (regex) {
assert(toStringOrRegExp(regex) instanceof RegExp)
})
})
it('should identify normal strings and just return a string', () => {
const nonregexes = ['"test"']
nonregexes.forEach(function (nonregex) {
assert(toStringOrRegExp(nonregex) == 'test')
})
})
})