Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scan bigger/fewer jsdoc tokens #53081

Merged
merged 19 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 69 additions & 56 deletions src/compiler/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ import {
tracing,
TransformFlags,
trimString,
trimStringEnd,
TryStatement,
TupleTypeNode,
TypeAliasDeclaration,
Expand Down Expand Up @@ -2165,6 +2166,10 @@ namespace Parser {
return currentToken = scanner.scanJsDocToken();
}

function nextJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
return currentToken = scanner.scanJSDocCommentTextToken(inBackticks);
}

function reScanGreaterToken(): SyntaxKind {
return currentToken = scanner.reScanGreaterToken();
}
Expand Down Expand Up @@ -8601,20 +8606,19 @@ namespace Parser {
}
loop: while (true) {
switch (token()) {
case SyntaxKind.JSDocCommentTextToken:
state = JSDocState.SavingComments;
pushComment(scanner.getTokenValue());
break;
case SyntaxKind.AtToken:
if (state === JSDocState.BeginningOfLine || state === JSDocState.SawAsterisk) {
removeTrailingWhitespace(comments);
if (!commentsPos) commentsPos = getNodePos();
addTag(parseTag(indent));
// NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
// Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
// for malformed examples like `/** @param {string} x @returns {number} the length */`
state = JSDocState.BeginningOfLine;
margin = undefined;
}
else {
pushComment(scanner.getTokenText());
}
removeTrailingWhitespace(comments);
if (!commentsPos) commentsPos = getNodePos();
addTag(parseTag(indent));
// NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
// Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
// for malformed examples like `/** @param {string} x @returns {number} the length */`
state = JSDocState.BeginningOfLine;
margin = undefined;
break;
case SyntaxKind.NewLineTrivia:
comments.push(scanner.getTokenText());
Expand All @@ -8623,24 +8627,23 @@ namespace Parser {
break;
case SyntaxKind.AsteriskToken:
const asterisk = scanner.getTokenText();
if (state === JSDocState.SawAsterisk || state === JSDocState.SavingComments) {
if (state === JSDocState.SawAsterisk) {
// If we've already seen an asterisk, then we can no longer parse a tag on this line
state = JSDocState.SavingComments;
pushComment(asterisk);
}
else {
Debug.assert(state === JSDocState.BeginningOfLine);
// Ignore the first asterisk on a line
state = JSDocState.SawAsterisk;
indent += asterisk.length;
}
break;
case SyntaxKind.WhitespaceTrivia:
Debug.assert(state !== JSDocState.SavingComments, "whitespace shouldn't come from the scanner while saving top-level comment text");
// only collect whitespace if we're already saving comments or have just crossed the comment indent margin
const whitespace = scanner.getTokenText();
if (state === JSDocState.SavingComments) {
comments.push(whitespace);
}
else if (margin !== undefined && indent + whitespace.length > margin) {
if (margin !== undefined && indent + whitespace.length > margin) {
comments.push(whitespace.slice(margin - indent));
}
indent += whitespace.length;
Expand Down Expand Up @@ -8671,15 +8674,20 @@ namespace Parser {
pushComment(scanner.getTokenText());
break;
}
nextTokenJSDoc();
if (state === JSDocState.SavingComments) {
nextJSDocCommentTextToken(/*inBackticks*/ false);
}
else {
nextTokenJSDoc();
}
}
removeTrailingWhitespace(comments);
if (parts.length && comments.length) {
parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? start, commentsPos));
const trimmedComments = trimStringEnd(comments.join(""));
if (parts.length && trimmedComments.length) {
parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? start, commentsPos));
}
if (parts.length && tags) Debug.assertIsDefined(commentsPos, "having parsed tags implies that the end of the comment span should be set");
const tagsArray = tags && createNodeArray(tags, tagsPos, tagsEnd);
return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : comments.length ? comments.join("") : undefined, tagsArray), start, end);
return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : trimmedComments.length ? trimmedComments : undefined, tagsArray), start, end);
});

function removeLeadingNewlines(comments: string[]) {
Expand All @@ -8689,8 +8697,18 @@ namespace Parser {
}

function removeTrailingWhitespace(comments: string[]) {
while (comments.length && comments[comments.length - 1].trim() === "") {
comments.pop();
while (comments.length) {
const trimmed = trimString(comments[comments.length - 1]);
sandersn marked this conversation as resolved.
Show resolved Hide resolved
if (trimmed === "") {
comments.pop();
}
else if (trimmed.length < comments[comments.length - 1].length) {
comments[comments.length - 1] = trimStringEnd(comments[comments.length - 1]);
break;
}
else {
break;
}
}
}

Expand Down Expand Up @@ -8846,7 +8864,6 @@ namespace Parser {
const parts: JSDocComment[] = [];
let linkEnd;
let state = JSDocState.BeginningOfLine;
let previousWhitespace = true;
let margin: number | undefined;
function pushComment(text: string) {
if (!margin) {
Expand All @@ -8862,39 +8879,36 @@ namespace Parser {
}
state = JSDocState.SawAsterisk;
}
let tok = token() as JSDocSyntaxKind;
let tok = token() as JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
loop: while (true) {
switch (tok) {
case SyntaxKind.JSDocCommentTextToken:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a stupid nit, but I was tricked into thinking this token could be returned from token() before any other token because it comes first in the list. Maybe I would have not misunderstood had the case not been added at the top, but, I don't think it actually matters.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a better place for it? I put it at the top because I thought it would be the most common and--who knows--maybe switch performance is order dependent. Except that with the improved scanner, it's not really more common than any other token.

Probably down with default is the best place.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I probably would have stuck it at the bottom, but, I don't really mind either way. Just a silly brain thing from me trying to understand the state machine.

if (state !== JSDocState.SavingBackticks) {
state = JSDocState.SavingComments; // leading identifiers start recording as well
}
pushComment(scanner.getTokenValue());
break;
case SyntaxKind.NewLineTrivia:
state = JSDocState.BeginningOfLine;
// don't use pushComment here because we want to keep the margin unchanged
comments.push(scanner.getTokenText());
indent = 0;
break;
case SyntaxKind.AtToken:
if (state === JSDocState.SavingBackticks
|| state === JSDocState.SavingComments && (!previousWhitespace || lookAhead(isNextJSDocTokenWhitespace))) {
// @ doesn't start a new tag inside ``, and inside a comment, only after whitespace or not before whitespace
comments.push(scanner.getTokenText());
break;
}
scanner.setTextPos(scanner.getTextPos() - 1);
// falls through
break loop;
case SyntaxKind.EndOfFileToken:
// Done
break loop;
case SyntaxKind.WhitespaceTrivia:
if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
pushComment(scanner.getTokenText());
}
else {
const whitespace = scanner.getTokenText();
// if the whitespace crosses the margin, take only the whitespace that passes the margin
if (margin !== undefined && indent + whitespace.length > margin) {
comments.push(whitespace.slice(margin - indent));
}
indent += whitespace.length;
Debug.assert(state !== JSDocState.SavingComments && state !== JSDocState.SavingBackticks, "whitespace shouldn't come from the scanner while saving comment text");
const whitespace = scanner.getTokenText();
Copy link
Member

@DanielRosenwasser DanielRosenwasser Mar 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This slices whitespace, then you slice out of the whitespace. Swap this to grab the beginning/end of the whitespace, calculate the length, and slice right out of the original sourceText only if necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good idea. I noted it and will try it in a separate PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done: #53121

// if the whitespace crosses the margin, take only the whitespace that passes the margin
if (margin !== undefined && indent + whitespace.length > margin) {
comments.push(whitespace.slice(margin - indent));
state = JSDocState.SavingComments;
}
indent += whitespace.length;
break;
case SyntaxKind.OpenBraceToken:
state = JSDocState.SavingComments;
Expand Down Expand Up @@ -8936,28 +8950,27 @@ namespace Parser {
pushComment(scanner.getTokenText());
break;
}
previousWhitespace = token() === SyntaxKind.WhitespaceTrivia;
tok = nextTokenJSDoc();
if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
tok = nextJSDocCommentTextToken(state === JSDocState.SavingBackticks);
}
else {
tok = nextTokenJSDoc();
}
}

removeLeadingNewlines(comments);
removeTrailingWhitespace(comments);
const trimmedComments = trimStringEnd(comments.join(""));
if (parts.length) {
if (comments.length) {
parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? commentsPos));
if (trimmedComments.length) {
parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? commentsPos));
}
return createNodeArray(parts, commentsPos, scanner.getTextPos());
}
else if (comments.length) {
return comments.join("");
else if (trimmedComments.length) {
return trimmedComments;
}
}

function isNextJSDocTokenWhitespace() {
const next = nextTokenJSDoc();
return next === SyntaxKind.WhitespaceTrivia || next === SyntaxKind.NewLineTrivia;
}

function parseJSDocLink(start: number) {
const linkType = tryParse(parseJSDocLinkPrefix);
if (!linkType) {
Expand Down
32 changes: 32 additions & 0 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ export interface Scanner {
reScanInvalidIdentifier(): SyntaxKind;
scanJsxToken(): JsxTokenSyntaxKind;
scanJsDocToken(): JSDocSyntaxKind;
/** @internal */
scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
jakebailey marked this conversation as resolved.
Show resolved Hide resolved
scan(): SyntaxKind;

getText(): string;
Expand Down Expand Up @@ -256,6 +258,7 @@ const textToToken = new Map(Object.entries({
"@": SyntaxKind.AtToken,
"#": SyntaxKind.HashToken,
"`": SyntaxKind.BacktickToken,
"JSDOC_COMMENT_TEXT": SyntaxKind.JSDocCommentTextToken,
sandersn marked this conversation as resolved.
Show resolved Hide resolved
}));

/*
Expand Down Expand Up @@ -1020,6 +1023,7 @@ export function createScanner(languageVersion: ScriptTarget,
reScanInvalidIdentifier,
scanJsxToken,
scanJsDocToken,
scanJSDocCommentTextToken,
scan,
getText,
clearCommentDirectives,
Expand Down Expand Up @@ -2455,6 +2459,34 @@ export function createScanner(languageVersion: ScriptTarget,
return scanJsxAttributeValue();
}

function scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
startPos = tokenPos = pos;
tokenFlags = TokenFlags.None;
if (pos >= end) {
return token = SyntaxKind.EndOfFileToken;
}
for (let ch = codePointAt(text, pos);
pos < end && (ch !== CharacterCodes.lineFeed && ch !== CharacterCodes.carriageReturn && ch !== CharacterCodes.backtick);
sandersn marked this conversation as resolved.
Show resolved Hide resolved
ch = codePointAt(text, pos += charSize(ch))) {
if (!inBackticks) {
if (ch === CharacterCodes.openBrace) {
break;
}
else if (ch === CharacterCodes.at
&& pos - 1 >= 0 && isWhiteSpaceSingleLine(codePointAt(text, pos - 1))
&& !(pos + 1 < end && isWhiteSpaceLike(codePointAt(text, pos + 1)))) {
// @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace
break;
}
}
}
if (pos === tokenPos) {
return scanJsDocToken();
}
tokenValue = text.substring(tokenPos, pos);
return token = SyntaxKind.JSDocCommentTextToken;
}

function scanJsDocToken(): JSDocSyntaxKind {
startPos = tokenPos = pos;
tokenFlags = TokenFlags.None;
Expand Down
5 changes: 5 additions & 0 deletions src/compiler/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ export const enum SyntaxKind {
BacktickToken,
/** Only the JSDoc scanner produces HashToken. The normal scanner produces PrivateIdentifier. */
HashToken,
/**
* Only the special JSDoc comment text scanner produces JSDocCommentTextTokes. One of these tokens spans all text after a tag comment's start and before the next @
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just now noticing the parallel between this comment and the previous ones. Given people can call our scanner themselves, maybe I was wrong and this should be public so that no undocumented kinds are returned from token?

* @internal
*/
JSDocCommentTextToken,
sandersn marked this conversation as resolved.
Show resolved Hide resolved
// Assignments
EqualsToken,
PlusEqualsToken,
Expand Down
1 change: 1 addition & 0 deletions src/testRunner/unittests/jsDocParsing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ oh.no
* Some\n\n * text\r\n * with newlines.
*/`);
parsesCorrectly("Chained tags, no leading whitespace", `/**@a @b @c@d*/`);
parsesCorrectly("Single trailing whitespace", `/** trailing whitespace */`);
parsesCorrectly("Initial star is not a tag", `/***@a*/`);
parsesCorrectly("Initial star space is not a tag", `/*** @a*/`);
parsesCorrectly("Initial email address is not a tag", `/**[email protected]*/`);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,27 @@
"flags": "JSDoc",
"modifierFlagsCache": 0,
"transformFlags": 0,
"comment": "* @a"
"comment": "*",
"tags": {
"0": {
"kind": "JSDocTag",
"pos": 5,
"end": 7,
"modifierFlagsCache": 0,
"transformFlags": 0,
"tagName": {
"kind": "Identifier",
"pos": 6,
"end": 7,
"modifierFlagsCache": 0,
"transformFlags": 0,
"escapedText": "a"
}
},
"length": 1,
"pos": 5,
"end": 7,
"hasTrailingComma": false,
"transformFlags": 0
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"kind": "JSDoc",
"pos": 0,
"end": 26,
"flags": "JSDoc",
"modifierFlagsCache": 0,
"transformFlags": 0,
"comment": "trailing whitespace"
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,41 @@
"flags": "JSDoc",
"modifierFlagsCache": 0,
"transformFlags": 0,
"comment": "* @type {number}"
"comment": "*",
"tags": {
"0": {
"kind": "JSDocTypeTag",
"pos": 6,
"end": 21,
"modifierFlagsCache": 0,
"transformFlags": 0,
"tagName": {
"kind": "Identifier",
"pos": 7,
"end": 11,
"modifierFlagsCache": 0,
"transformFlags": 0,
"escapedText": "type"
},
"typeExpression": {
"kind": "JSDocTypeExpression",
"pos": 12,
"end": 20,
"modifierFlagsCache": 0,
"transformFlags": 0,
"type": {
"kind": "NumberKeyword",
"pos": 13,
"end": 19,
"modifierFlagsCache": 0,
"transformFlags": 1
}
}
},
"length": 1,
"pos": 6,
"end": 21,
"hasTrailingComma": false,
"transformFlags": 0
}
}
Loading