-
Notifications
You must be signed in to change notification settings - Fork 0
/
lex.ts
108 lines (99 loc) · 3.54 KB
/
lex.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Lex //
////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////
type TokenLocation = string
type Token = {
text: string,
type: string,
location: TokenLocation,
}
function showToken(token: Token): string {
return `\`${token.text}\` at ${token.location}`;
}
class ParseError {
constructor(public readonly message: string) {}
}
function lex(source: string): Token[] | ParseError {
let locationByCharacter: string[] = [];
{
let line = 1;
let column = 1;
for (let i = 0; i < source.length; i++) {
locationByCharacter[i] = `line ${line}:${column}`;
if (source[i] == "\r") {
column = 1;
} else if (source[i] == "\n") {
column = 1;
line++;
} else if (source[i] == "\t") {
column += 4;
} else {
column++;
}
}
locationByCharacter.push("end of file");
}
let result: Token[] = [];
let start = 0;
let rules: {[n: string]: RegExp} = {
integer: /^[0-9]+/,
name: /^[a-zA-Z_][a-zA-Z_0-9]*/,
whitespace: /^\s+/,
comment: /^\/\/[^\n]*/,
string: new RegExp(`^"([^"]|\\\\")*"`),
special: /^[()\]\[.,;:&#|{}!]/,
operator: /^[+\-*/=<>?%^~]+/,
foreign: /^foreign#[^#]*#/,
};
let special = ["func", "foreign", "mut", "discard", "true", "false", "self", "never", "interface", "instance", "struct", "enum", "switch", "of", "case", "yield", "is", "and", "or", "if", "while", "var", "for", "else", "service", "effect", "return", "break", "continue"];
while (start < source.length) {
let next: null | Token = null;
for (let tokenType in rules) {
let match = source.substr(start).match(rules[tokenType]);
if (!match) {
continue;
}
if (!next || next.text.length < match[0].length) {
next = {text: match[0], type: tokenType, location: locationByCharacter[start]};
}
}
if (!next) {
return new ParseError(`unknown token at character ${start}`);
}
start += next.text.length;
if (special.indexOf(next.text) >= 0) {
next.type = "special";
}
result.push(next);
}
result.push({text: "", type: "end", location: "end of script"});
return result.filter(token => ["comment", "whitespace"].indexOf(token.type) < 0);
}
class TokenStream {
constructor(private tokens: Token[], private position: number = 0) {}
head(): Token {
return this.tokens[this.position];
}
tail(): TokenStream {
return new TokenStream(this.tokens, Math.min(this.tokens.length-1, this.position + 1));
}
}
type TokenSelector = string;
function selectsToken(selector: string, token: Token): boolean {
if (selector.charAt(0) == "$") {
return token.type == selector.substr(1);
} else {
return token.text == selector;
}
}
export {
Token,
TokenStream,
lex,
showToken,
ParseError,
TokenSelector,
selectsToken,
}