-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexical-analysis.js
143 lines (133 loc) · 3.91 KB
/
lexical-analysis.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
const TYPES = {
KEYWORD: 'keyword',
IDENTIFIER: 'identifier',
COMMENT: 'comment',
STRING: 'string',
OPERATOR: 'operator',
PUNCTUATION: 'punctuation',
INTEGER: 'integer',
}
const MESSAGES = {
IS_MATCHED: 'is matched'
}
function analysis() {
let str = `# this is a comment
println("Hello World!");
println(2 + 3 * 4);
# functions are introduced with lambda or λ
fib = lambda (n) if n < 2 then n else fib(n - 1) + fib(n - 2);
println(fib(15));
print-range = λ(a, b) # λ is synonym to lambda
if a <= b then { # then here is optional as you can see below
print(a);
if a + 1 <= b {
print(", ");
print-range(a + 1, b);
} else println(""); # newline
};
print-range(1, 5);
`
function is_id_start(ch) {
return /[a-zA-Zλ_]/.test(ch)
}
function is_id(ch) {
return is_id_start(ch) || /[!?\-<>=0-9]/.test(ch)
}
function is_comment_start(ch) {
return ch === '#'
}
function is_digit(ch) {
return /[0-9]/.test(ch)
}
function is_str_separator(ch) {
return ch === '"'
}
function is_op(ch) {
return '+-*/%=&|<>!'.indexOf(ch) >= 0
}
function is_keyword(str) {
return " lambda println print print-range if then else λ ".indexOf(` ${str} `) >= 0
}
function is_punc(ch) {
return ",;(){}[]".indexOf(ch) >= 0
}
let lines = str.split('\n')
let row = 0, column = 0, curChar, curWord
let res = []
function read_next() {
curWord = curWord + curChar
next()
}
function genToken(type) {
Log.genMsg(row, column, type)
value = curWord ? curWord : curChar
curWord = ''
return {
type,
value,
}
}
function is_whitespace(ch) {
return ' \t\n'.indexOf(ch) >= 0
}
function skip_whitespace() {
while(is_whitespace(curChar)) {
next()
}
}
function next() {
column++;
curChar = lines[row-1][column-1]
}
const Log = {
genMsg: function (row, column, type) {
const slot = curWord ? `, ${curWord}, ` : " "
console.log(`row:${row},column:${column}, a ${type}${slot}${MESSAGES.IS_MATCHED}`)
}
}
for (line of lines) {
row++
column = 1
curWord = ''
curChar = line[0]
while (column <= line.length) {
skip_whitespace()
if(curChar == undefined) continue
if (is_comment_start(curChar)) {
msg = Log.genMsg(row, column, TYPES.COMMENT)
break
} else if (is_digit(curChar)) {
while (is_digit(curChar)) {
read_next()
}
res.push(genToken(TYPES.INTEGER))
} else if (is_str_separator(curChar)) {
next()
while (!is_str_separator(curChar)) {
read_next()
}
next()
res.push(genToken(TYPES.STRING))
} else if (is_id_start(curChar)) {
while (is_id(curChar)) {
read_next()
}
if (is_keyword(curWord)) {
res.push(genToken(TYPES.KEYWORD))
} else {
res.push(genToken(TYPES.IDENTIFIER))
}
} else if(is_op(curChar)) {
res.push(genToken(TYPES.OPERATOR))
next()
} else if(is_punc(curChar)) {
res.push(genToken(TYPES.PUNCTUATION))
next()
} else {
next()
}
}
}
return res
}
analysis()