Skip to content
This repository has been archived by the owner on Jun 15, 2021. It is now read-only.

Added parser #2

Merged
merged 1 commit into from
Mar 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"editor.formatOnPaste": true,
"files.exclude": {
"**/node_modules": true
}
},
"cSpell.words": ["bfnrt"]
}
12 changes: 4 additions & 8 deletions docs/grammar.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This grammar is based on the published by
[actions/workflow-parser](https://github.com/actions/workflow-parser/blob/master/language.md). Compiling a `.workflow`
file is divided into three phases:

1. **Scanning**: where text is divided into invididual tokens, marked by a position, length and a type. Missing
1. **Scanning**: where text is divided into individual tokens, marked by a range and a type.
2. **Parsing**: an initial syntax tree is constructed from the token stream. Parsing is error-tolerant and prefers to
construct partially valid trees in order to report diagnostics in the next phase.
3. **Binding**: where a complete list of symbols is compiled, and any advanced analysis/error reporting is done.
Expand All @@ -13,8 +13,7 @@ A compilation holds the results of these operations. The rest of this document d

## Scanning

The scanner produces a list of the tokens below from a document text. Each token has a start position, a length, and a
type. The valid whitespace characters are ' ', '\n', '\r', and '\t'.
The scanner produces a list of the tokens below from a document text.

```g4
VERSION_KEYWORD : 'version' ;
Expand Down Expand Up @@ -43,12 +42,9 @@ LINE_COMMENT : ('#' | '//') ~[\r\n]* ;

INTEGER_LITERAL : [0-9]+ ;
STRING_LITERAL : '"' (('\\' ["\\/bfnrt]) | ~["\\\u0000-\u001F\u007F])* '"' ;
```

Two additional token types are created by the compiler:

- **UNRECOGNIZED_TOKEN** when a character not supported by the grammar is encountered.
- **MISSING_TOKEN** when a token was expected at a certain location, but was not found. These tokens have zero length.
WS : [\n \t\r] -> skip;
```

## Parsing

Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
*/

export { Compilation } from "./util/compilation";
export { Token, TextPosition, TextRange, TokenKind } from "./scanning/tokens";
export { TextPosition, TextRange } from "./scanning/tokens";
export { DiagnosticCode, Diagnostic } from "./util/diagnostics";
247 changes: 247 additions & 0 deletions src/parsing/parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
/*!
* Copyright 2019 Omar Tawfik. Please see LICENSE file at the root of this repository.
*/

import { DiagnosticBag } from "../util/diagnostics";
import { Token, TokenKind, getTokenDescription } from "../scanning/tokens";
import {
DocumentSyntax,
VersionSyntax,
BlockSyntax,
KeyValuePairSyntax,
StringValueSyntax,
StringArrayValueSyntax,
EnvVariablesValueSyntax,
StringArrayMemberSyntax,
EnvVariableMemberSyntax,
} from "./syntax-nodes";

interface ParseContext {
readonly parent?: ParseContext;
readonly supported: ReadonlyArray<TokenKind>;
}

export function parseTokens(allTokens: ReadonlyArray<Token>, bag: DiagnosticBag): DocumentSyntax {
const tokens = allTokens.filter(token => token.kind !== TokenKind.Comment && token.kind !== TokenKind.Unrecognized);
const reportedErrors = Array<boolean>();

let index = 0;
const versions: VersionSyntax[] = [];
const blocks: BlockSyntax[] = [];

while (index < tokens.length) {
parseTopLevelNode({
supported: [],
});
}

return new DocumentSyntax(versions);

function parseTopLevelNode(context: ParseContext): void {
const keywordKinds = [TokenKind.VersionKeyword, TokenKind.WorkflowKeyword, TokenKind.ActionKeyword];
const keyword = eat(context, ...keywordKinds);

const innerContext = {
parent: context,
supported: keywordKinds,
};

switch (keyword.kind) {
case TokenKind.VersionKeyword: {
parseVersion(keyword, innerContext);
break;
}
case TokenKind.WorkflowKeyword:
case TokenKind.ActionKeyword: {
parseBlock(keyword, innerContext);
break;
}
case TokenKind.Missing: {
// move to the next token
break;
}
default: {
throw new Error(`Unexpected token '${getTokenDescription(keyword.kind)}' here.`);
}
}
}

function parseVersion(version: Token, context: ParseContext): void {
const equal = eat(context, TokenKind.Equal);
const integer = eat(context, TokenKind.IntegerLiteral);

versions.push(new VersionSyntax(version, equal, integer));
}

function parseBlock(type: Token, context: ParseContext): void {
const name = eat(context, TokenKind.StringLiteral);
const openBracket = eat(context, TokenKind.LeftCurlyBracket);

const members = parseKeyValuePairs({
parent: context,
supported: [TokenKind.RightCurlyBracket],
});

const closeBracket = eat(context, TokenKind.RightCurlyBracket);
blocks.push(new BlockSyntax(type, name, openBracket, members, closeBracket));
}

function parseKeyValuePairs(context: ParseContext): ReadonlyArray<KeyValuePairSyntax> {
const members: KeyValuePairSyntax[] = [];

while (!isNext(TokenKind.RightCurlyBracket)) {
const keyKinds = [
TokenKind.OnKeyword,
TokenKind.ResolvesKeyword,
TokenKind.UsesKeyword,
TokenKind.NeedsKeyword,
TokenKind.RunsKeyword,
TokenKind.ArgsKeyword,
TokenKind.EnvKeyword,
TokenKind.SecretsKeyword,
];

const key = eat(context, ...keyKinds);
if (key.kind === TokenKind.Missing) {
// Stop looking for key-value pairs
break;
}

members.push(
parseKeyValuePair(key, {
parent: context,
supported: keyKinds,
}),
);
}

return members;
}

function parseKeyValuePair(key: Token, context: ParseContext): KeyValuePairSyntax {
const equal = eat(context, TokenKind.Equal);
const valueStart = eat(context, TokenKind.StringLiteral, TokenKind.LeftCurlyBracket, TokenKind.LeftSquareBracket);

let value: StringValueSyntax | StringArrayValueSyntax | EnvVariablesValueSyntax | undefined;
switch (valueStart.kind) {
case TokenKind.StringLiteral: {
value = new StringValueSyntax(valueStart);
break;
}
case TokenKind.LeftSquareBracket: {
value = parseStringArray(valueStart, context);
break;
}
case TokenKind.LeftCurlyBracket: {
value = parseEnvVariables(valueStart, context);
break;
}
case TokenKind.Missing: {
// leave value undefined
break;
}
default: {
throw new Error(`Unexpected token '${getTokenDescription(valueStart.kind)}' here.`);
}
}

return new KeyValuePairSyntax(key, equal, value);
}

function parseStringArray(openBracket: Token, context: ParseContext): StringArrayValueSyntax {
const values: StringArrayMemberSyntax[] = [];

while (!isNext(TokenKind.RightSquareBracket)) {
const value = eat(context, TokenKind.StringLiteral);

if (value.kind === TokenKind.Missing) {
break;
}

let comma: Token | undefined;
if (isNext(TokenKind.Comma)) {
comma = eat(context, TokenKind.Comma);
}

values.push(new StringArrayMemberSyntax(value, comma));
}

const closeBracket = eat(context, TokenKind.RightSquareBracket);
return new StringArrayValueSyntax(openBracket, values, closeBracket);
}

function parseEnvVariables(openBracket: Token, context: ParseContext): EnvVariablesValueSyntax {
const variables: EnvVariableMemberSyntax[] = [];

while (!isNext(TokenKind.RightCurlyBracket)) {
const name = eat(context, TokenKind.Identifier);

if (name.kind === TokenKind.Missing) {
break;
}

const equal = eat(context, TokenKind.Equal);
const value = eat(context, TokenKind.StringLiteral);

variables.push(new EnvVariableMemberSyntax(name, equal, value));
}

const closeBracket = eat(context, TokenKind.RightCurlyBracket);
return new EnvVariablesValueSyntax(openBracket, variables, closeBracket);
}

function isNext(kind: TokenKind): boolean {
return index < tokens.length && tokens[index].kind === kind;
}

function eat(context: ParseContext, ...expected: TokenKind[]): Token {
while (true) {
if (index >= tokens.length) {
return missingToken(expected);
}

const current = tokens[index];
if (expected.includes(current.kind)) {
index += 1;
return current;
}

let canBeHandledByParent = false;
let currentContext: ParseContext | undefined = context;
while (!canBeHandledByParent && currentContext) {
canBeHandledByParent = currentContext.supported.includes(current.kind);
currentContext = currentContext.parent;
}

if (canBeHandledByParent) {
return missingToken(expected);
}

if (!reportedErrors[index]) {
bag.unexpectedToken(current);
reportedErrors[index] = true;
}

index += 1;
}
}

function missingToken(expected: TokenKind[]): Token {
let missingIndex = index;
const endOfFile = index >= tokens.length;
if (endOfFile) {
missingIndex = tokens.length - 1;
}

const range = tokens[missingIndex].range;
if (!reportedErrors[missingIndex]) {
bag.missingToken(expected, range, endOfFile);
reportedErrors[missingIndex] = true;
}

return {
range,
kind: TokenKind.Missing,
};
}
}
Loading