From 27e8a62e2b33148a0a1cb37860c7f7821a528b47 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 13 Nov 2016 23:01:09 -0800 Subject: [PATCH 1/8] Support JavaScript code blocks set apart by triple backticks (``` ... ```) --- lib/coffee-script/lexer.js | 16 ++++++++++++---- src/lexer.coffee | 13 ++++++++++--- test/javascript_literals.coffee | 29 +++++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index e9984ee013..50c7913e78 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -336,12 +336,20 @@ }; Lexer.prototype.jsToken = function() { - var match, script; + var here, js, length, match, script; if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) { return 0; } - this.token('JS', (script = match[0]).slice(1, -1), 0, script.length); - return script.length; + js = match[0], here = match[1]; + if (here != null) { + script = here; + length = here.length + 6; + } else { + script = js.slice(1, -1); + length = js.length; + } + this.token('JS', script, 0, length); + return length; }; Lexer.prototype.regexToken = function() { @@ -1009,7 +1017,7 @@ MULTI_DENT = /^(?:\n[^\n\S]*)+/; - JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/; + JSTOKEN = /^```([\s\S]*?)```|^`[^\\`]*(?:\\.[^\\`]*)*`/; STRING_START = /^(?:'''|"""|'|")/; diff --git a/src/lexer.coffee b/src/lexer.coffee index 1613a0c715..9126f1ac9a 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -293,8 +293,15 @@ exports.Lexer = class Lexer # Matches JavaScript interpolated directly into the source via backticks. jsToken: -> return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk - @token 'JS', (script = match[0])[1...-1], 0, script.length - script.length + [js, here] = match + if here? + script = here + length = here.length + 6 # 6 is the length of the six ` characters + else + script = js[1...-1] + length = js.length + @token 'JS', script, 0, length + length # Matches regular expression literals, as well as multiline extended ones. # Lexing regular expressions is difficult to distinguish from division, so we @@ -900,7 +907,7 @@ CODE = /^[-=]>/ MULTI_DENT = /^(?:\n[^\n\S]*)+/ -JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/ +JSTOKEN = /^```([\s\S]*?)```|^`[^\\`]*(?:\\.[^\\`]*)*`/ # String-matching-regexes. STRING_START = /^(?:'''|"""|'|")/ diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index 91a9efddeb..a4a08ede7e 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -4,7 +4,28 @@ # TODO: refactor javascript literal tests # TODO: add indexing and method invocation tests: `[1]`[0] is 1, `function(){}`.call() -eq '\\`', ` - // Inline JS - "\\\`" -` +test "inline JavaScript is evaluated", -> + eq '\\`', ` + // Inline JS + "\\\`" + ` + +test "block inline JavaScript is evaluated", -> + ``` + var a = 1; + var b = 2; + ``` + c = 3 + ```var d = 4;``` + eq a + b + c + d, 10 + +test "block inline JavaScript containing backticks", -> + ``` + // This is a comment with `backticks` + var a = 42; + var b = `foo ${'bar'}`; + var c = 3; + var d = 'foo`bar`'; + ``` + eq a + c, 45 + eq d, 'foo`bar`' From 107d8afd36684334950f933ca93539277abdf5b6 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 13 Nov 2016 23:14:11 -0800 Subject: [PATCH 2/8] Add test for escaped backticks --- test/javascript_literals.coffee | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index a4a08ede7e..4e67095db4 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -10,6 +10,10 @@ test "inline JavaScript is evaluated", -> "\\\`" ` +test "escaped backticks are output correctly", -> + `var a = 'foo\`bar';` + eq a, 'foo`bar' + test "block inline JavaScript is evaluated", -> ``` var a = 1; From edd36b5cdad4d42c5f902debc6e080c6c664ed8f Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 13 Nov 2016 23:29:44 -0800 Subject: [PATCH 3/8] =?UTF-8?q?Remove=20TODOs=20for=20things=20we=E2=80=99?= =?UTF-8?q?re=20never=20going=20to=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/javascript_literals.coffee | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index 4e67095db4..12b3f21526 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -1,9 +1,6 @@ -# Javascript Literals +# JavaScript Literals # ------------------- -# TODO: refactor javascript literal tests -# TODO: add indexing and method invocation tests: `[1]`[0] is 1, `function(){}`.call() - test "inline JavaScript is evaluated", -> eq '\\`', ` // Inline JS From b9dd31086e1a38b0cabf7246c76a62bcaeb118bd Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Mon, 14 Nov 2016 16:41:30 -0800 Subject: [PATCH 4/8] Convert escaped backticks to backticks; update tests --- lib/coffee-script/lexer.js | 1 + src/lexer.coffee | 1 + test/javascript_literals.coffee | 12 ++++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index 50c7913e78..c3535458a2 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -348,6 +348,7 @@ script = js.slice(1, -1); length = js.length; } + script = script.replace(/\\`/g, '`'); this.token('JS', script, 0, length); return length; }; diff --git a/src/lexer.coffee b/src/lexer.coffee index 9126f1ac9a..4c17c1b3ed 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -300,6 +300,7 @@ exports.Lexer = class Lexer else script = js[1...-1] length = js.length + script = script.replace /\\`/g, '`' # Convert escaped backticks to backticks @token 'JS', script, 0, length length diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index 12b3f21526..154443bf43 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -8,8 +8,15 @@ test "inline JavaScript is evaluated", -> ` test "escaped backticks are output correctly", -> - `var a = 'foo\`bar';` - eq a, 'foo`bar' + `var a = \`2 + 2 = ${4}\`` + eq a, '2 + 2 = 4' + +test "backslashes before a newline don’t break JavaScript blocks", -> + `var a = \`To be, or not\\ + to be.\`` + eq a, ''' + To be, or not\\ + to be.''' test "block inline JavaScript is evaluated", -> ``` @@ -29,4 +36,5 @@ test "block inline JavaScript containing backticks", -> var d = 'foo`bar`'; ``` eq a + c, 45 + eq b, 'foo bar' eq d, 'foo`bar`' From 3200259430b1d3be178f5943d9ba0f0b59307d34 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Mon, 14 Nov 2016 17:33:20 -0800 Subject: [PATCH 5/8] Block inline JavaScript can end with an escaped backtick character --- lib/coffee-script/lexer.js | 18 ++++++------------ src/lexer.coffee | 15 +++++---------- test/javascript_literals.coffee | 7 +++++++ 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index c3535458a2..831f8cf161 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -336,21 +336,15 @@ }; Lexer.prototype.jsToken = function() { - var here, js, length, match, script; + var js, match, script; if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) { return 0; } - js = match[0], here = match[1]; - if (here != null) { - script = here; - length = here.length + 6; - } else { - script = js.slice(1, -1); - length = js.length; - } + js = match[0]; + script = js.slice(0, 3) === '```' ? js.slice(3, -3) : js.slice(1, -1); script = script.replace(/\\`/g, '`'); - this.token('JS', script, 0, length); - return length; + this.token('JS', script, 0, js.length); + return js.length; }; Lexer.prototype.regexToken = function() { @@ -1018,7 +1012,7 @@ MULTI_DENT = /^(?:\n[^\n\S]*)+/; - JSTOKEN = /^```([\s\S]*?)```|^`[^\\`]*(?:\\.[^\\`]*)*`/; + JSTOKEN = /^```([\s\S]*?)(?:\\`(```)|```)|^`[^\\`]*(?:\\.[^\\`]*)*`/; STRING_START = /^(?:'''|"""|'|")/; diff --git a/src/lexer.coffee b/src/lexer.coffee index 4c17c1b3ed..d55bcc4ab4 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -293,16 +293,11 @@ exports.Lexer = class Lexer # Matches JavaScript interpolated directly into the source via backticks. jsToken: -> return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk - [js, here] = match - if here? - script = here - length = here.length + 6 # 6 is the length of the six ` characters - else - script = js[1...-1] - length = js.length + [js] = match + script = if js[0..2] is '```' then js[3...-3] else js[1...-1] script = script.replace /\\`/g, '`' # Convert escaped backticks to backticks - @token 'JS', script, 0, length - length + @token 'JS', script, 0, js.length + js.length # Matches regular expression literals, as well as multiline extended ones. # Lexing regular expressions is difficult to distinguish from division, so we @@ -908,7 +903,7 @@ CODE = /^[-=]>/ MULTI_DENT = /^(?:\n[^\n\S]*)+/ -JSTOKEN = /^```([\s\S]*?)```|^`[^\\`]*(?:\\.[^\\`]*)*`/ +JSTOKEN = /^```([\s\S]*?)(?:\\`(```)|```)|^`[^\\`]*(?:\\.[^\\`]*)*`/ # String-matching-regexes. STRING_START = /^(?:'''|"""|'|")/ diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index 154443bf43..c6a95ce86e 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -38,3 +38,10 @@ test "block inline JavaScript containing backticks", -> eq a + c, 45 eq b, 'foo bar' eq d, 'foo`bar`' + +test "block JavaScript can end with an escaped backtick character", -> + ```var a = \`hello\```` + ``` + var b = \`world${'!'}\```` + eq a, 'hello' + eq b, 'world!' From 0d5449f9246f8afddd979ada1cbbfc8ee3332eef Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Tue, 15 Nov 2016 14:39:54 -0800 Subject: [PATCH 6/8] Updated JavaScript token regexes per @lydell --- lib/coffee-script/lexer.js | 8 +++++--- src/lexer.coffee | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index 831f8cf161..bdbb308809 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -1,6 +1,6 @@ // Generated by CoffeeScript 1.11.1 (function() { - var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVALID_ESCAPE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isUnassignable, key, locationDataToString, ref, ref1, repeat, starts, throwSyntaxError, + var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVALID_ESCAPE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isUnassignable, key, locationDataToString, ref, ref1, repeat, starts, throwSyntaxError, indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }, slice = [].slice; @@ -337,7 +337,7 @@ Lexer.prototype.jsToken = function() { var js, match, script; - if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) { + if (!(this.chunk.charAt(0) === '`' && (match = HERE_JSTOKEN.exec(this.chunk) || JSTOKEN.exec(this.chunk)))) { return 0; } js = match[0]; @@ -1012,7 +1012,9 @@ MULTI_DENT = /^(?:\n[^\n\S]*)+/; - JSTOKEN = /^```([\s\S]*?)(?:\\`(```)|```)|^`[^\\`]*(?:\\.[^\\`]*)*`/; + JSTOKEN = /^`(?!``)((?:[^`\\]|\\[\s\S])*)`/; + + HERE_JSTOKEN = /^```((?:[^`\\]|\\[\s\S]|`(?!``))*)```/; STRING_START = /^(?:'''|"""|'|")/; diff --git a/src/lexer.coffee b/src/lexer.coffee index d55bcc4ab4..53c9e19936 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -292,7 +292,8 @@ exports.Lexer = class Lexer # Matches JavaScript interpolated directly into the source via backticks. jsToken: -> - return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk + return 0 unless @chunk.charAt(0) is '`' and + (match = HERE_JSTOKEN.exec(@chunk) or JSTOKEN.exec(@chunk)) [js] = match script = if js[0..2] is '```' then js[3...-3] else js[1...-1] script = script.replace /\\`/g, '`' # Convert escaped backticks to backticks @@ -903,7 +904,8 @@ CODE = /^[-=]>/ MULTI_DENT = /^(?:\n[^\n\S]*)+/ -JSTOKEN = /^```([\s\S]*?)(?:\\`(```)|```)|^`[^\\`]*(?:\\.[^\\`]*)*`/ +JSTOKEN = ///^ `(?!``) ((?: [^`\\] | \\[\s\S] )*) ` /// +HERE_JSTOKEN = ///^ ``` ((?: [^`\\] | \\[\s\S] | `(?!``) )*) ``` /// # String-matching-regexes. STRING_START = /^(?:'''|"""|'|")/ From 76896200ee0100ccce887db228e97b1a99bb086a Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Wed, 16 Nov 2016 22:38:36 -0800 Subject: [PATCH 7/8] In JavaScript blocks, escape backslashes when they immediately precede backticks; additional tests --- lib/coffee-script/lexer.js | 12 ++++++------ src/lexer.coffee | 13 ++++++++----- test/javascript_literals.coffee | 22 +++++++++++++++++++++- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index bdbb308809..c6309bc0c9 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -336,15 +336,15 @@ }; Lexer.prototype.jsToken = function() { - var js, match, script; + var match, script; if (!(this.chunk.charAt(0) === '`' && (match = HERE_JSTOKEN.exec(this.chunk) || JSTOKEN.exec(this.chunk)))) { return 0; } - js = match[0]; - script = js.slice(0, 3) === '```' ? js.slice(3, -3) : js.slice(1, -1); - script = script.replace(/\\`/g, '`'); - this.token('JS', script, 0, js.length); - return js.length; + script = match[1].replace(/\\+(`|$)/g, function(string) { + return string.slice(-Math.ceil(string.length / 2)); + }); + this.token('JS', script, 0, match[0].length); + return match[0].length; }; Lexer.prototype.regexToken = function() { diff --git a/src/lexer.coffee b/src/lexer.coffee index 53c9e19936..3c3962d8cf 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -294,11 +294,14 @@ exports.Lexer = class Lexer jsToken: -> return 0 unless @chunk.charAt(0) is '`' and (match = HERE_JSTOKEN.exec(@chunk) or JSTOKEN.exec(@chunk)) - [js] = match - script = if js[0..2] is '```' then js[3...-3] else js[1...-1] - script = script.replace /\\`/g, '`' # Convert escaped backticks to backticks - @token 'JS', script, 0, js.length - js.length + # Convert escaped backticks to backticks, and escaped backslashes + # just before escaped backticks to backslashes + script = match[1].replace /\\+(`|$)/g, (string) -> + # `string` is always a value like '\`', '\\\`', '\\\\\`', etc. + # By reducing it to its latter half, we turn '\`' to '`', '\\\`' to '\`', etc. + string[-Math.ceil(string.length / 2)..] + @token 'JS', script, 0, match[0].length + match[0].length # Matches regular expression literals, as well as multiline extended ones. # Lexing regular expressions is difficult to distinguish from division, so we diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index c6a95ce86e..bed05cec67 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -4,7 +4,7 @@ test "inline JavaScript is evaluated", -> eq '\\`', ` // Inline JS - "\\\`" + "\\\\\`" ` test "escaped backticks are output correctly", -> @@ -45,3 +45,23 @@ test "block JavaScript can end with an escaped backtick character", -> var b = \`world${'!'}\```` eq a, 'hello' eq b, 'world!' + +test "escaped JavaScript blocks speed round", -> + # The following has escaped backslashes because they’re required in strings, but the intent is this: + # `hello` → hello; + # `\`hello\`` → `hello`; + # `\`Escaping backticks in JS: \\\`hello\\\`\`` → `Escaping backticks in JS: \`hello\``; + # `Single backslash: \ ` → Single backslash: \ ; + # `Double backslash: \\ ` → Double backslash: \\ ; + # `Single backslash at EOS: \\` → Single backslash at EOS: \; + # `Double backslash at EOS: \\\\` → Double backslash at EOS: \\; + for [input, output] in [ + ['`hello`', 'hello;'] + ['`\\`hello\\``', '`hello`;'] + ['`\\`Escaping backticks in JS: \\\\\\`hello\\\\\\`\\``', '`Escaping backticks in JS: \\`hello\\``;'] + ['`Single backslash: \\ `', 'Single backslash: \\ ;'] + ['`Double backslash: \\\\ `', 'Double backslash: \\\\ ;'] + ['`Single backslash at EOS: \\\\`', 'Single backslash at EOS: \\;'] + ['`Double backslash at EOS: \\\\\\\\`', 'Double backslash at EOS: \\\\;'] + ] + eq CoffeeScript.compile(input, bare: yes), "#{output}\n\n" From 9945e683a725e0f5a308c950d8764a5f7ea58360 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Thu, 17 Nov 2016 00:14:30 -0800 Subject: [PATCH 8/8] =?UTF-8?q?Test=20that=20we=20don=E2=80=99t=20break=20?= =?UTF-8?q?backslash=20escaping=20in=20JavaScript=20literals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/javascript_literals.coffee | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/javascript_literals.coffee b/test/javascript_literals.coffee index bed05cec67..45c68535c1 100644 --- a/test/javascript_literals.coffee +++ b/test/javascript_literals.coffee @@ -46,6 +46,9 @@ test "block JavaScript can end with an escaped backtick character", -> eq a, 'hello' eq b, 'world!' +test "JavaScript block only escapes backslashes followed by backticks", -> + eq `'\\\n'`, '\\\n' + test "escaped JavaScript blocks speed round", -> # The following has escaped backslashes because they’re required in strings, but the intent is this: # `hello` → hello;