-
Notifications
You must be signed in to change notification settings - Fork 48
/
grammar.co
362 lines (294 loc) · 12.2 KB
/
grammar.co
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# The Coco parser is generated by [Jison](http://github.com/zaach/jison)
# from this grammar file. Jison is a bottom-up parser generator, similar in
# style to [Bison](http://www.gnu.org/software/bison),
# implemented in JavaScript.
# It can recognize
# [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar)
# type grammars. To create the Jison parser, we list the pattern to match
# on the left-hand side, and the action to take (usually the creation of syntax
# tree nodes) on the right. As the parser runs, it
# shifts tokens from our token stream, from left to right, and
# [attempts to match](http://en.wikipedia.org/wiki/Bottom-up_parsing)
# the token sequence against the rules below. When a match can be made, it
# reduces into the
# [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols)
# (the enclosing name at the top), and we proceed from there.
#
# If you run the `coke build:parser` command, Jison constructs a parse table
# from our rules and saves it into [lib/parser.js](../lib/parser.js).
# Jison DSL
# ---------
# Our handy DSL for Jison grammar generation, thanks to
# [Tim Caswell](http://github.com/creationix). For every rule in the grammar,
# we pass the pattern-defining string, the action to run, and extra options,
# optionally. If no action is specified, we simply pass the value of the
# previous nonterminal.
ditto = {}
last = ''
o = (patterns, action, options) ->
patterns.=trim!split /\s+/
action &&= if action is ditto then last else
"#action"
.replace /^function\s*\(\)\s*\{\s*([\s\S]*\S)\s*\}/ '$1'
.replace /\breturn\b/ '$$$$ ='
.replace /\b(?!Er)[A-Z][\w.]*/g \yy.$&
.replace /\.L\(/g '$&yylineno, '
[patterns, last := action or '', options]
# Grammatical Rules
# -----------------
# In all of the rules that follow, you'll see the name of the nonterminal as
# the key to a list of alternative matches. With each match's action, the
# dollar-sign variables are provided by Jison as references to the value of
# their numeric position, so in this rule:
#
# Expression MATH Expression
#
# `$1` would be the value of the first _Expression_, `$2` would be the token
# value for the _MATH_ terminal, and `$3` would be the value of the second
# _Expression_.
bnf =
# Things that can directly be accessed or called.
Head:
o \ID -> L Var $1
o \STRNUM -> L Literal $1
o \LITERAL ditto
o \Parenthetical
o \List
# The function literal can be either anonymous with `->`,
o 'PARAM( ArgList )PARAM -> Block' -> L Fun $2, $5, $4 is \~>
# or named with `function`.
o 'FUNCTION CALL( ArgList )CALL Block' -> L Fun($3, $5)named $1
o 'IF Expression Block Else' -> If $2, $3, $1 is \unless .addElse $4
o 'IF INDENT IfLines DEDENT' -> $3.maybeUnlessBlock $1 is \unless
o 'LoopHead Block Else' -> $1.addBody $2 .addElse $3
o 'SWITCH Expression Cases' -> new Switch $2, $3
o 'SWITCH Expression Cases DEFAULT Block' -> new Switch $2, $3, $5
o 'SWITCH Cases' -> new Switch null $2
o 'SWITCH Cases DEFAULT Block' -> new Switch null $2, $4
o 'SWITCH Block' -> new Switch null [], $2
o 'TRY Block' -> new Try $2
o 'TRY Block CATCH Block' -> new Try $2, $3, $4
o 'TRY Block CATCH Block FINALLY Block' -> new Try $2, $3, $4, $6
o 'TRY Block FINALLY Block' -> new Try $2, null null $4
o 'CLASS Chain OptExtends OptImplements Block'
, -> new Class $2.unwrap!, $3, $4, $5
o 'CLASS OptExtends OptImplements Block'
, -> new Class null , $2, $3, $4
o 'LET CALL( ArgList )CALL Block' -> Call.let $3, $5
o 'WITH Expression Block' ->
Pipe $2, $3, \with
o 'FOR Expression Block' ->
new For(kind: $1, source: $2, ref: true)addBody $3
Chain:
o \Head -> Chain $1
o 'Chain DOT Key' -> $1.add Index $3, $2, true
o 'Chain DOT List' ditto
o 'Chain CALL( ArgList )CALL' -> $1.add Call $3
o 'Chain ?' -> Chain Existence $1.unwrap!
# An array or object
List:
o '[ ArgList ]' -> L Arr $2
o '{ PropList }' -> L Obj $2
# can be labeled to perform named destructuring.
o '[ ArgList ] LABEL' -> L Arr $2 .named $4
o '{ PropList } LABEL' -> L Obj $2 .named $4
# **Key** represents a property name, before `:` or after `.`.
Key:
o \KeyBase
o \Parenthetical
KeyBase:
o \ID -> L Key $1
o \STRNUM -> L Literal $1
# **ArgList** is either the list of objects passed into a function call,
# the parameter list of a function, or the contents of an array literal
# (i.e. comma-separated expressions). Newlines work as well.
ArgList:
o '' -> []
o 'Args OptComma'
Args:
o \Arg -> [$1]
o 'Args , Arg' -> $1.push $3; $1
o 'Args OptComma NEWLINE Arg' -> $1.push $4; $1
o 'INDENT Args OptComma DEDENT' -> $2
o 'Args , INDENT Args OptComma DEDENT' -> $1.concat $4
Arg:
o \Expression
o '... Expression' -> Splat $2
o \... -> Splat L(Arr!), true
# An optional, trailing comma.
OptComma:
o ''
o \,
# A list of lines, separated by newlines or semicolons.
Lines:
o '' -> L Block!
o \Line -> Block $1
o 'Lines NEWLINE Line' -> $1.add $3
o 'Lines NEWLINE'
Line:
o \Expression
# Cascade: an implicit `with`.
o 'Expression Block' -> Pipe $1, $2, \cascade
o 'PARAM( ArgList )PARAM <- Expression'
, -> Call.back $2, $5, $4 is \<~
o \COMMENT -> L JS $1, true true
# [yadayadayada](http://search.cpan.org/~tmtm/Yada-Yada-Yada-1.00/Yada.pm)
o \... -> L Throw JS "Error('unimplemented')"
# An indented block of expressions.
# Note that [Lexer](#lexer) rewrites some single-line forms into blocks.
Block:
o 'INDENT Lines DEDENT' -> $2
...
# All the different types of expressions in our language.
Expression:
o \Chain -> $1.unwrap!
o 'Chain ASSIGN Expression'
, -> Assign $1.unwrap!, $3 , $2
o 'Chain ASSIGN INDENT ArgList DEDENT'
, -> Assign $1.unwrap!, Arr.maybe($4), $2
o 'Expression IMPORT Expression'
, -> Import $1, $3 , $2 is \<<<<
o 'Expression IMPORT INDENT ArgList DEDENT'
, -> Import $1, Arr.maybe($4), $2 is \<<<<
o 'CREMENT Chain' -> Unary $1, $2.unwrap!
o 'Chain CREMENT' -> Unary $2, $1.unwrap!, true
o 'UNARY ASSIGN Chain' -> Assign $3.unwrap!, [$1] $2
o '+- ASSIGN Chain' ditto
o '^ ASSIGN Chain' ditto
o 'UNARY Expression' -> Unary $1, $2
o '+- Expression' ditto, prec: \UNARY
o '^ Expression' ditto, prec: \UNARY
o 'UNARY INDENT ArgList DEDENT' -> Unary $1, Arr.maybe $3
o 'Expression +- Expression' -> Binary $2, $1, $3
o 'Expression ^ Expression' ditto
o 'Expression COMPARE Expression' ditto
o 'Expression LOGIC Expression' ditto
o 'Expression MATH Expression' ditto
o 'Expression SHIFT Expression' ditto
o 'Expression BITWISE Expression' ditto
o 'Expression RELATION Expression' ->
*if \! is $2.charAt 0 then Binary $2.slice(1), $1, $3 .invert!
else Binary $2 , $1, $3
o 'Expression |> Expression' -> Pipe $1, $3
o 'Expression |> Block' ditto
o 'Chain !?' -> Existence $1.unwrap!, true
o 'Expression POST_IF Expression' -> If $3, $1, $2 is \unless
o 'Expression LoopHead' -> $2.addBody Block $1
o 'DO Block WHILE Expression'
, -> new While($4, $3 is \until, true)addBody $2
# `return` or `throw`.
o 'HURL Expression' -> Jump[$1] $2
o 'HURL INDENT ArgList DEDENT' -> Jump[$1] Arr.maybe $3
o \HURL -> L Jump[$1]!
# `break` or `continue`.
o \JUMP -> L new Jump $1
o 'JUMP ID' -> L new Jump $1, $2
o 'Chain EXTENDS Expression' -> Util.Extends $1.unwrap!, $3
o 'LABEL Expression' -> new Label $1, $2
o 'LABEL Block' ditto
# `var`, `const`, `export`, or `import`
o 'DECL INDENT ArgList DEDENT' -> Decl $1, $3, yylineno+1
Exprs:
o \Expression -> [$1]
o 'Exprs , Expression' -> $1.push $3; $1
# The various forms of property.
KeyValue:
o \Key
o \LITERAL -> Prop L(Key $1, $1 not of <[ arguments eval ]>), L Literal $1
o 'Key DOT KeyBase' -> Prop $3, Chain( $1; [Index $3, $2])
o 'LITERAL DOT KeyBase' -> Prop $3, Chain(L Literal $1; [Index $3, $2])
o '{ PropList } LABEL' -> Prop L(Key $4), L(Obj $2 .named $4)
o '[ ArgList ] LABEL' -> Prop L(Key $4), L(Arr $2 .named $4)
Prop:
o 'Key : Expression' -> Prop $1, $3
o 'Key : INDENT ArgList DEDENT' -> Prop $1, Arr.maybe($4)
o \KeyValue
o 'KeyValue LOGIC Expression' -> Binary $2, $1, $3
o '+- Key' -> Prop $2.maybeKey! , L Literal $1 is \+
o '+- LITERAL' -> Prop L(Key $2, true), L Literal $1 is \+
o '... Expression' -> Splat $2
o \COMMENT -> L JS $1, true true
# Properties within an object literal can be separated by
# commas, as in JavaScript, or simply by newlines.
PropList:
o '' -> []
o 'Props OptComma'
Props:
o \Prop -> [$1]
o 'Props , Prop' -> $1.push $3; $1
o 'Props OptComma NEWLINE Prop' -> $1.push $4; $1
o 'INDENT Props OptComma DEDENT' -> $2
Parenthetical:
o '( Body )' -> Parens $2.chomp!unwrap!, false, $1 is \"
...
Body:
o \Lines
o \Block
o 'Block NEWLINE Lines' -> $1.add $3
Else:
o '' -> null
o 'ELSE Block' -> $2
o 'ELSE IF Expression Block Else' -> If $3, $4, $2 is \unless .addElse $5
LoopHead:
# The source of a `for`-loop is an array, object, or range.
# Unless it's iterating over an object, you can choose to step through
# in fixed-size increments.
o 'FOR Chain OF Expression'
, -> new For kind: $1, item: $2.unwrap!, index: $3, source: $4
o 'FOR Chain OF Expression BY Expression'
, -> new For kind: $1, item: $2.unwrap!, index: $3, source: $4, step: $6
o 'FOR ID IN Expression'
, -> new For kind: $1, index: $2, source: $4, in: true
o 'FOR ID , Chain IN Expression'
, -> new For kind: $1, index: $2, item: $4.unwrap!, source: $6, in: true
o 'FOR ID FROM Expression TO Expression'
, -> new For kind: $1, index: $2, from: $4, op: $5, to: $6
o 'FOR ID FROM Expression TO Expression BY Expression'
, -> new For kind: $1, index: $2, from: $4, op: $5, to: $6, step: $8
o 'WHILE Expression' -> new While $2, $1 is \until
o 'WHILE Expression , Expression' -> new While $2, $1 is \until, $4
IfLines:
o 'Expression Block Else' -> If $1, $2 .addElse $3
o 'Expression Block NEWLINE IfLines' -> If $1, $2 .addElse $4
Cases:
o 'CASE Exprs Block' -> [new Case $2, $3]
o 'Cases CASE Exprs Block' -> $1.push new Case $3, $4; $1
OptExtends:
o 'EXTENDS Expression' -> $2
o '' -> null
OptImplements:
o 'IMPLEMENTS Exprs' -> $2
o '' -> null
# Precedence and Associativity
# ----------------------------
# Following these rules is what makes
# `a + b * c` parse as `a + (b * c)` (rather than `(a + b) * c`),
# and `x = y = z` `x = (y = z)` (not `(x = y) = z`).
operators =
# Listed from lower precedence.
<[ left |> ]>
<[ left POST_IF FOR WHILE ]>
<[ right , ASSIGN HURL EXTENDS INDENT SWITCH CASE TO BY LABEL ]>
<[ right LOGIC ]>
<[ left ^ BITWISE ]>
<[ right COMPARE ]>
<[ left RELATION ]>
<[ left SHIFT IMPORT ]>
<[ left +- ]>
<[ left MATH ]>
<[ right UNARY ]>
<[ nonassoc CREMENT ]>
# Wrapping Up
# -----------
# Process all of our rules and prepend resolutions, while recording all
# terminals (every symbol which does not appear as the name of a rule above)
# as `tokens`.
tokens =
for name, alts in bnf
for alt of alts
token if token not in bnf for token of alt.0
.join ' '
bnf.Root = [[[\Body] 'return $$']]
# Finally, initialize the parser with the name of the root.
module.exports =
new (require \jison)Parser {bnf, operators, tokens, startSymbol: \Root}