This repository has been archived by the owner on Nov 26, 2017. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.py
76 lines (63 loc) · 1.36 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Infasm - An Inform 6 assembler for the Z-Machine
# Copyright (c) 2009, Dannii Willis
# Released under a BSD-like licence, see LICENCE
from ply import lex
import warnings
# Some of the Inform 6 directives
directives = {
'array': 'ARRAY',
'constant': 'CONSTANT',
'global': 'GLOBAL',
}
# Assembly tokens
tokens = [
'COMMENT',
'ID',
'LABEL',
'NUMBER',
'OPCODE',
] + list(directives.values())
literals = ';->?~[]='
# Token specifications
t_ignore_COMMENT = r'!.*'
# Convert IDs to lowercase and check for directives
def t_ID(t):
r'[a-zA-Z][a-zA-Z_0-9]*'
t.value = t.value.lower()
t.type = directives.get(t.value,'ID')
return t
# Strip the . and ; from labels
def t_LABEL(t):
r'\.[a-zA-Z][a-zA-Z0-9_]*;'
t.value = t.value[1:-1]
return t
# Allow decimal and hexadecimal number literals
def t_NUMBER(t):
r'(\$|-)?[0-9A-Fa-f]+'
if t.value[0] == '$':
base = 16
t.value = t.value[1:]
else:
base = 10
try:
t.value = int(t.value, base)
except ValueError:
print "Integer value too large", t.value
t.value = 0
return t
# Strip the initial @ from opcodes
def t_OPCODE(t):
r'@[a-zA-Z][a-zA-Z0-9_]*'
t.value = t.value[1:]
return t
# Non-tokens
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Error handling rule
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()