Skip to content

Commit

Permalink
Follow regexp validation as per tc39/ecma262#1869
Browse files Browse the repository at this point in the history
  • Loading branch information
mysticatea authored Apr 2, 2020
1 parent 078e2cc commit f1c3592
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 35 deletions.
73 changes: 38 additions & 35 deletions acorn/src/regexp.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,49 +40,49 @@ export class RegExpValidationState {

// If u flag is given, this returns the code point at the index (it combines a surrogate pair).
// Otherwise, this returns the code unit of the index (can be a part of a surrogate pair).
at(i) {
at(i, forceU = false) {
const s = this.source
const l = s.length
if (i >= l) {
return -1
}
const c = s.charCodeAt(i)
if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
return c
}
const next = s.charCodeAt(i + 1)
return next >= 0xDC00 && next <= 0xDFFF ? (c << 10) + next - 0x35FDC00 : c
}

nextIndex(i) {
nextIndex(i, forceU = false) {
const s = this.source
const l = s.length
if (i >= l) {
return l
}
let c = s.charCodeAt(i), next
if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
(next = s.charCodeAt(i + 1)) < 0xDC00 || next > 0xDFFF) {
return i + 1
}
return i + 2
}

current() {
return this.at(this.pos)
current(forceU = false) {
return this.at(this.pos, forceU)
}

lookahead() {
return this.at(this.nextIndex(this.pos))
lookahead(forceU = false) {
return this.at(this.nextIndex(this.pos, forceU), forceU)
}

advance() {
this.pos = this.nextIndex(this.pos)
advance(forceU = false) {
this.pos = this.nextIndex(this.pos, forceU)
}

eat(ch) {
if (this.current() === ch) {
this.advance()
eat(ch, forceU = false) {
if (this.current(forceU) === ch) {
this.advance(forceU)
return true
}
return false
Expand Down Expand Up @@ -418,9 +418,9 @@ pp.regexp_eatExtendedPatternCharacter = function(state) {
return false
}

// GroupSpecifier[U] ::
// GroupSpecifier ::
// [empty]
// `?` GroupName[?U]
// `?` GroupName
pp.regexp_groupSpecifier = function(state) {
if (state.eat(0x3F /* ? */)) {
if (this.regexp_eatGroupName(state)) {
Expand All @@ -434,8 +434,8 @@ pp.regexp_groupSpecifier = function(state) {
}
}

// GroupName[U] ::
// `<` RegExpIdentifierName[?U] `>`
// GroupName ::
// `<` RegExpIdentifierName `>`
// Note: this updates `state.lastStringValue` property with the eaten name.
pp.regexp_eatGroupName = function(state) {
state.lastStringValue = ""
Expand All @@ -448,9 +448,9 @@ pp.regexp_eatGroupName = function(state) {
return false
}

// RegExpIdentifierName[U] ::
// RegExpIdentifierStart[?U]
// RegExpIdentifierName[?U] RegExpIdentifierPart[?U]
// RegExpIdentifierName ::
// RegExpIdentifierStart
// RegExpIdentifierName RegExpIdentifierPart
// Note: this updates `state.lastStringValue` property with the eaten name.
pp.regexp_eatRegExpIdentifierName = function(state) {
state.lastStringValue = ""
Expand All @@ -464,17 +464,18 @@ pp.regexp_eatRegExpIdentifierName = function(state) {
return false
}

// RegExpIdentifierStart[U] ::
// RegExpIdentifierStart ::
// UnicodeIDStart
// `$`
// `_`
// `\` RegExpUnicodeEscapeSequence[?U]
// `\` RegExpUnicodeEscapeSequence[+U]
pp.regexp_eatRegExpIdentifierStart = function(state) {
const start = state.pos
let ch = state.current()
state.advance()
const forceU = this.options.ecmaVersion >= 11
let ch = state.current(forceU)
state.advance(forceU)

if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
ch = state.lastIntValue
}
if (isRegExpIdentifierStart(ch)) {
Expand All @@ -489,19 +490,20 @@ function isRegExpIdentifierStart(ch) {
return isIdentifierStart(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */
}

// RegExpIdentifierPart[U] ::
// RegExpIdentifierPart ::
// UnicodeIDContinue
// `$`
// `_`
// `\` RegExpUnicodeEscapeSequence[?U]
// `\` RegExpUnicodeEscapeSequence[+U]
// <ZWNJ>
// <ZWJ>
pp.regexp_eatRegExpIdentifierPart = function(state) {
const start = state.pos
let ch = state.current()
state.advance()
const forceU = this.options.ecmaVersion >= 11
let ch = state.current(forceU)
state.advance(forceU)

if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
ch = state.lastIntValue
}
if (isRegExpIdentifierPart(ch)) {
Expand Down Expand Up @@ -571,7 +573,7 @@ pp.regexp_eatCharacterEscape = function(state) {
this.regexp_eatCControlLetter(state) ||
this.regexp_eatZero(state) ||
this.regexp_eatHexEscapeSequence(state) ||
this.regexp_eatRegExpUnicodeEscapeSequence(state) ||
this.regexp_eatRegExpUnicodeEscapeSequence(state, false) ||
(!state.switchU && this.regexp_eatLegacyOctalEscapeSequence(state)) ||
this.regexp_eatIdentityEscape(state)
)
Expand Down Expand Up @@ -644,13 +646,14 @@ function isControlLetter(ch) {
}

// https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence
pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
pp.regexp_eatRegExpUnicodeEscapeSequence = function(state, forceU = false) {
const start = state.pos
const switchU = forceU || state.switchU

if (state.eat(0x75 /* u */)) {
if (this.regexp_eatFixedHexDigits(state, 4)) {
const lead = state.lastIntValue
if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) {
if (switchU && lead >= 0xD800 && lead <= 0xDBFF) {
const leadSurrogateEnd = state.pos
if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.regexp_eatFixedHexDigits(state, 4)) {
const trail = state.lastIntValue
Expand All @@ -665,15 +668,15 @@ pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
return true
}
if (
state.switchU &&
switchU &&
state.eat(0x7B /* { */) &&
this.regexp_eatHexDigits(state) &&
state.eat(0x7D /* } */) &&
isValidUnicode(state.lastIntValue)
) {
return true
}
if (state.switchU) {
if (switchU) {
state.raise("Invalid unicode escape")
}
state.pos = start
Expand Down
10 changes: 10 additions & 0 deletions bin/test262.whitelist
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
language/literals/regexp/named-groups/invalid-non-id-continue-groupspecifier.js (default)
language/literals/regexp/named-groups/invalid-non-id-continue-groupspecifier.js (strict mode)
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-3.js (default)
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-3.js (strict mode)
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-6.js (default)
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-6.js (strict mode)
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier.js (default)
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier.js (strict mode)
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier-2.js (default)
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier-2.js (strict mode)
1 change: 1 addition & 0 deletions test/run.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
require("./tests-async-iteration.js");
require("./tests-regexp.js");
require("./tests-regexp-2018.js");
require("./tests-regexp-2020.js");
require("./tests-json-superset.js");
require("./tests-optional-catch-binding.js");
require("./tests-bigint.js");
Expand Down
21 changes: 21 additions & 0 deletions test/tests-regexp-2020.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
if (typeof exports != "undefined") {
var test = require("./driver.js").test
var testFail = require("./driver.js").testFail
}

// https://github.com/tc39/ecma262/pull/1869
testFail("/(?<\\ud835\\udc9c>.)/", "Invalid regular expression: /(?<\\ud835\\udc9c>.)/: Invalid capture group name (1:1)", { ecmaVersion: 2019 })
test("/(?<\\ud835\\udc9c>.)/", {}, { ecmaVersion: 2020 })
test("/(?<\\ud835\\udc9c>.)/u", {}, { ecmaVersion: 2019 })
test("/(?<\\ud835\\udc9c>.)/u", {}, { ecmaVersion: 2020 })

testFail("/(?<\\u{1d49c}>.)/", "Invalid regular expression: /(?<\\u{1d49c}>.)/: Invalid capture group name (1:1)", { ecmaVersion: 2019 })
test("/(?<\\u{1d49c}>.)/", {}, { ecmaVersion: 2020 })
test("/(?<\\u{1d49c}>.)/u", {}, { ecmaVersion: 2019 })
test("/(?<\\u{1d49c}>.)/u", {}, { ecmaVersion: 2020 })

testFail("/(?<π’œ>.)/", "Invalid regular expression: /(?<π’œ>.)/: Invalid capture group name (1:1)", { ecmaVersion: 2019 })
test("/(?<π’œ>.)/", {}, { ecmaVersion: 2020 })
test("/(?<π’œ>.)/u", {}, { ecmaVersion: 2019 })
test("/(?<π’œ>.)/u", {}, { ecmaVersion: 2020 })

0 comments on commit f1c3592

Please sign in to comment.