From c0d94b1f46d692a30905b9d45e18db1ac4a93024 Mon Sep 17 00:00:00 2001 From: patrick Date: Thu, 6 Jul 2023 14:08:39 +0200 Subject: [PATCH] find templates with html parser and js parser --- package.json | 7 +- src/parse-templates.ts | 408 ++++++++------------------- src/preprocess-embedded-templates.ts | 7 +- yarn.lock | 19 ++ 4 files changed, 146 insertions(+), 295 deletions(-) diff --git a/package.json b/package.json index 3851ccc..5355c16 100644 --- a/package.json +++ b/package.json @@ -38,11 +38,13 @@ "line-column": "^1.0.2", "magic-string": "^0.25.7", "parse-static-imports": "^1.1.0", + "parse5": "^7.1.2", "string.prototype.matchall": "^4.0.6", - "validate-peer-dependencies": "^1.1.0" + "validate-peer-dependencies": "^1.1.0", + "@babel/traverse": "^7.19.6", + "@babel/parser": "^7.19.6" }, "devDependencies": { - "@babel/traverse": "^7.19.6", "@babel/types": "^7.19.4", "@ember/optional-features": "^2.0.0", "@ember/string": "^3.0.1", @@ -51,6 +53,7 @@ "@glimmer/component": "^1.0.3", "@glimmer/syntax": "0.84.3", "@glimmer/tracking": "^1.0.3", + "@types/parse5": "^7.0.0", "@types/jest": "^27.4.1", "@types/line-column": "^1.0.0", "@types/string.prototype.matchall": "^4.0.1", diff --git a/src/parse-templates.ts b/src/parse-templates.ts index 183dea4..c6e5691 100644 --- a/src/parse-templates.ts +++ b/src/parse-templates.ts @@ -1,31 +1,37 @@ -import matchAll from 'string.prototype.matchall'; import parseStaticImports from 'parse-static-imports'; +import * as Parse5 from 'parse5'; +import * as parser from '@babel/parser'; +import traverse from '@babel/traverse'; import { TEMPLATE_TAG_NAME, TEMPLATE_LITERAL_MODULE_SPECIFIER, TEMPLATE_LITERAL_IDENTIFIER, } from './util'; -import { expect } from './debug'; +import { Identifier } from '@babel/types'; export type TemplateMatch = TemplateTagMatch | TemplateLiteralMatch; +export type Match = { start: number; end: number }; + export interface TemplateTagMatch { type: 'template-tag'; tagName: string; - start: RegExpMatchArray; - end: RegExpMatchArray; + start: Match; + end: Match; contents: string; + prefix?: string; } export interface TemplateLiteralMatch { type: 'template-literal'; tagName: string; contents: string; - start: RegExpMatchArray; - end: RegExpMatchArray; + start: Match; + end: Match; importPath: string; importIdentifier: string; + prefix?: string; } export function isTemplateLiteralMatch( @@ -63,32 +69,13 @@ export interface ParseTemplatesOptions { templateLiteral?: StaticImportConfig[]; } -const escapeChar = '\\'; -const stringDelimiter = /['"]/; - -const singleLineCommentStart = /\/\//; -const newLine = /\n/; -const multiLineCommentStart = /\/\*/; -const multiLineCommentEnd = /\*\//; - -const templateLiteralStart = /([$a-zA-Z_][0-9a-zA-Z_$]*)?`/; -const templateLiteralEnd = /`/; - -const dynamicSegmentStart = /\${/; -const blockStart = /{/; -const dynamicSegmentEnd = /}/; - -function isEscaped(template: string, _offset: number | undefined) { - let offset = expect(_offset, 'Expected an index to check escaping'); - - let count = 0; - - while (template[offset - 1] === escapeChar) { - count++; - offset--; - } - - return count % 2 === 1; +function replaceRange( + s: string, + start: number, + end: number, + substitute: string +) { + return s.substring(0, start) + substitute + s.substring(end); } export const DEFAULT_PARSE_TEMPLATES_OPTIONS = { @@ -149,281 +136,122 @@ export function parseTemplates( const templateTag = options?.templateTag; const templateLiteralConfig = options?.templateLiteral; - const templateTagStart = new RegExp(`<${templateTag}[^<]*>`); - const templateTagEnd = new RegExp(``); - const argumentsMatchRegex = new RegExp(`<${templateTag}[^<]*\\S[^<]*>`); - let importedNames = new Map(); if (templateLiteralConfig) { importedNames = findImportedNames(template, templateLiteralConfig); } - const allTokens = new RegExp( - [ - singleLineCommentStart.source, - newLine.source, - multiLineCommentStart.source, - multiLineCommentEnd.source, - stringDelimiter.source, - templateLiteralStart.source, - templateLiteralEnd.source, - dynamicSegmentStart.source, - dynamicSegmentEnd.source, - blockStart.source, - templateTagStart.source, - templateTagEnd.source, - ].join('|'), - 'g' - ); - - const tokens = Array.from(matchAll(template, allTokens)); - - while (tokens.length > 0) { - const currentToken = tokens.shift()!; // eslint-disable-line @typescript-eslint/no-non-null-assertion - - parseToken(results, template, currentToken, tokens, true); - } - - /** - * Parse the current token. If top level, then template tags can be parsed. - * Else, we are nested within a dynamic segment, which is currently unsupported. - */ - function parseToken( - results: TemplateMatch[], - template: string, - token: RegExpMatchArray, - tokens: RegExpMatchArray[], - isTopLevel = false - ) { - if (token[0].match(multiLineCommentStart)) { - parseMultiLineComment(results, template, token, tokens); - } else if (token[0].match(singleLineCommentStart)) { - parseSingleLineComment(results, template, token, tokens); - } else if (token[0].match(templateLiteralStart)) { - parseTemplateLiteral( - results, - template, - token, - tokens, - isTopLevel, - importedNames - ); - } else if ( - isTopLevel && - templateTag !== undefined && - templateTagStart && - token[0].match(templateTagStart) - ) { - parseTemplateTag(results, template, token, tokens, templateTag); - } else if (token[0].match(stringDelimiter)) { - parseString(results, template, token, tokens); - } - } - - /** - * Parse a string. All tokens within a string are ignored - * since there are no dynamic segments within these. - */ - function parseString( - _results: TemplateMatch[], - template: string, - startToken: RegExpMatchArray, - tokens: RegExpMatchArray[] - ) { - while (tokens.length > 0) { - const currentToken = expect(tokens.shift(), 'expected token'); - - if ( - currentToken[0] === startToken[0] && - !isEscaped(template, currentToken.index) - ) { - return; - } - } - } - - /** - * Parse a single-line comment. All tokens within a single-line comment are ignored - * since there are no dynamic segments within them. - */ - function parseSingleLineComment( - _results: TemplateMatch[], - _template: string, - _startToken: RegExpMatchArray, - tokens: RegExpMatchArray[] - ) { - while (tokens.length > 0) { - const currentToken = expect(tokens.shift(), 'expected token'); - - if (currentToken[0] === '\n') { - return; - } - } - } - - /** - * Parse a multi-line comment. All tokens within a multi-line comment are ignored - * since there are no dynamic segments within them. - */ - function parseMultiLineComment( - _results: TemplateMatch[], - _template: string, - _startToken: RegExpMatchArray, - tokens: RegExpMatchArray[] - ) { - while (tokens.length > 0) { - const currentToken = expect(tokens.shift(), 'expected token'); - - if (currentToken[0] === '*/') { - return; - } - } - } - - /** - * Parse a template literal. If a dynamic segment is found, enters the dynamic - * segment and parses it recursively. If no dynamic segments are found and the - * literal is top level (e.g. not nested within a dynamic segment) and has a - * tag, pushes it into the list of results. - */ - function parseTemplateLiteral( - results: TemplateMatch[], - template: string, - startToken: RegExpMatchArray, - tokens: RegExpMatchArray[], - isTopLevel = false, - importedNames: Map - ) { - let hasDynamicSegment = false; - - while (tokens.length > 0) { - let currentToken = expect(tokens.shift(), 'expected token'); - - if (isEscaped(template, currentToken.index)) continue; - - if (currentToken[0].match(dynamicSegmentStart)) { - hasDynamicSegment = true; - - parseDynamicSegment(results, template, currentToken, tokens); - } else if (currentToken[0].match(templateLiteralEnd)) { - if (isTopLevel && !hasDynamicSegment && startToken[1]?.length > 0) { - // Handle the case where a tag-like was matched, e.g. hbs`hello` - if (currentToken[0].length > 1) { - const tokenStr = currentToken[0]; - const index = expect(currentToken.index, 'expected index'); - - currentToken = ['`']; - currentToken.index = index + tokenStr.length - 1; + if (templateTag) { + const stack: Parse5.Token.TagToken[] = []; + const htmlParser = new Parse5.Tokenizer( + { sourceCodeLocationInfo: true }, + { + onCharacter(token: Parse5.Token.CharacterToken): void { + // + }, + onComment(token: Parse5.Token.CommentToken): void { + // + }, + onDoctype(token: Parse5.Token.DoctypeToken): void { + // + }, + onEof(token: Parse5.Token.EOFToken): void { + // + }, + onNullCharacter(token: Parse5.Token.CharacterToken): void { + // + }, + onWhitespaceCharacter(token: Parse5.Token.CharacterToken): void { + // + }, + onStartTag(token: Parse5.Token.TagToken) { + if (token.tagName === templateTag) { + stack.push(token); } - const tagName = startToken[1]; - const importConfig = importedNames.get(tagName); - - if (importConfig !== undefined) { - let contents = ''; - - if (startToken.index !== undefined) { - const templateStart = startToken.index + startToken[0].length; - - contents = template.slice(templateStart, currentToken.index); + }, + onEndTag(token: Parse5.Token.TagToken) { + if (token.tagName === templateTag) { + if (stack.length === 1) { + const start = stack[0].location!; + const end = token.location!; + results.push({ + type: 'template-tag', + tagName: templateTag, + contents: template.slice(start.startOffset, end.endOffset), + start: { start: start.startOffset, end: start.endOffset }, + end: { start: end.startOffset, end: end.endOffset }, + }); } - - results.push({ - type: 'template-literal', - tagName, - contents: contents, - start: startToken, - end: currentToken, - importPath: importConfig.importPath, - importIdentifier: importConfig.importIdentifier, - }); + stack.pop(); } - } - - return; - } - } - } - - /** - * Parses a dynamic segment within a template literal. Continues parsing until - * the dynamic segment has been exited, ignoring all tokens within it. - * Accounts for nested block statements, strings, and template literals. - */ - function parseDynamicSegment( - results: TemplateMatch[], - template: string, - _startToken: RegExpMatchArray, - tokens: RegExpMatchArray[] - ) { - let stack = 1; - - while (tokens.length > 0) { - const currentToken = expect(tokens.shift(), 'expected token'); - - if (currentToken[0].match(blockStart)) { - stack++; - } else if (currentToken[0].match(dynamicSegmentEnd)) { - stack--; - } else { - parseToken(results, template, currentToken, tokens); + }, } - - if (stack === 0) { - return; - } - } + ); + htmlParser.write(template, true); } - /** - * Parses a template tag. Continues parsing until the template tag has closed, - * accounting for nested template tags. - */ - function parseTemplateTag( - results: TemplateMatch[], - _template: string, - startToken: RegExpMatchArray, - tokens: RegExpMatchArray[], - templateTag: string - ) { - let stack = 1; - - if (argumentsMatchRegex && startToken[0].match(argumentsMatchRegex)) { - throw new Error( - `embedded template preprocessing currently does not support passing arguments, found args in: ${relativePath}` + let jsCode = template; + results.forEach((r) => { + const length = r.end.end - r.start.start - 4; + jsCode = replaceRange( + jsCode, + r.start.start, + r.end.end, + `['${' '.repeat(length)}']` + ); + }); + + const ast = parser.parse(jsCode, { + ranges: true, + allowImportExportEverywhere: true, + plugins: ['typescript', 'decorators'], + }); + + const validTemplates = new Set(); + + traverse(ast, { + StringLiteral(path) { + const node = path.node; + const t = results.find( + (t) => + t.type === 'template-tag' && + t.start.start === node.start! - 1 && + t.end.end === node.end! + 1 ); - } - - while (tokens.length > 0) { - const currentToken = expect(tokens.shift(), 'expected token'); - - if (currentToken[0].match(templateTagStart)) { - stack++; - } else if (currentToken[0].match(templateTagEnd)) { - stack--; - } - - if (stack === 0) { - let contents = ''; - - if (startToken.index !== undefined) { - const templateStart = startToken.index + startToken[0].length; - - contents = template.slice(templateStart, currentToken.index); + if (t) { + if (path.parent.type === 'MemberExpression') { + t.prefix = ';'; } - + validTemplates.add(t); + } + }, + TaggedTemplateExpression(path) { + const node = path.node; + const tagName = (node.tag as Identifier).name; + const importConfig = importedNames.get(tagName); + if (importConfig) { results.push({ - type: 'template-tag', - tagName: templateTag, - contents: contents, - start: startToken, - end: currentToken, + type: 'template-literal', + tagName, + contents: template.slice( + node.quasi.quasis[0].start!, + node.quasi.quasis.slice(-1)[0].end! + ), + start: { start: node.tag.range![0], end: node.tag.range![1] + 1 }, + end: { start: node.range![1] - 1, end: node.range![1] }, + importPath: importConfig.importPath, + importIdentifier: importConfig.importIdentifier, }); - - return; } - } - } + }, + }); + results.slice().forEach((t) => { + if (t.type === 'template-tag' && !validTemplates.has(t)) { + const i = results.indexOf(t); + results.splice(i, 1); + } + }); return results; } diff --git a/src/preprocess-embedded-templates.ts b/src/preprocess-embedded-templates.ts index c00974d..18dd6f4 100644 --- a/src/preprocess-embedded-templates.ts +++ b/src/preprocess-embedded-templates.ts @@ -2,6 +2,7 @@ import MagicString from 'magic-string'; import path from 'path'; import lineColumn from 'line-column'; import { expect } from './debug'; + import { parseTemplates, ParseTemplatesOptions, @@ -114,8 +115,8 @@ function replaceMatch( getTemplateLocals: GetTemplateLocals, includeTemplateTokens: boolean ): Replacement[] { - const { start: openStart, end: openEnd } = getMatchStartAndEnd(match.start); - const { start: closeStart, end: closeEnd } = getMatchStartAndEnd(match.end); + const { start: openStart, end: openEnd } = match.start; + const { start: closeStart, end: closeEnd } = match.end; let options = ''; @@ -129,7 +130,7 @@ function replaceMatch( } } - const newStart = `${startReplacement}\``; + const newStart = `${match.prefix || ''}${startReplacement}\``; const newEnd = `\`, { strictMode: true${options} }${endReplacement}`; s.overwrite(openStart, openEnd, newStart); diff --git a/yarn.lock b/yarn.lock index bf1fd1f..32d83c5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2011,6 +2011,13 @@ resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0" integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA== +"@types/parse5@^7.0.0": + version "7.0.0" + resolved "https://registry.yarnpkg.com/@types/parse5/-/parse5-7.0.0.tgz#8b412a0a4461c84d6280a372bfa8c57a418a06bd" + integrity sha512-f2SeAxumolBmhuR62vNGTsSAvdz/Oj0k682xNrcKJ4dmRnTPODB74j6CPoNPzBPTHsu7Y7W7u93Mgp8Ovo8vWw== + dependencies: + parse5 "*" + "@types/prettier@^2.1.5": version "2.7.1" resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.7.1.tgz#dfd20e2dc35f027cdd6c1908e80a5ddc7499670e" @@ -5962,6 +5969,11 @@ entities@^2.0.0: resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== +entities@^4.4.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + entities@~3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/entities/-/entities-3.0.1.tgz#2b887ca62585e96db3903482d336c1006c3001d4" @@ -10882,6 +10894,13 @@ parse5-htmlparser2-tree-adapter@^6.0.0: dependencies: parse5 "^6.0.1" +parse5@*, parse5@^7.1.2: + version "7.1.2" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32" + integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw== + dependencies: + entities "^4.4.0" + parse5@6.0.1, parse5@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b"