forked from timtadh/lexmachine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
doc.go
116 lines (116 loc) · 4.36 KB
/
doc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Package lexmachine is a full lexical analysis framework for the Go
// programming language. It supports a restricted but usable set of regular
// expressions appropriate for writing lexers for complex programming
// languages. The framework also supports sub-lexers and non-regular lexing
// through an "escape hatch" which allows the users to consume any number of
// further bytes after a match. So if you want to support nested C-style
// comments or other paired structures you can do so at the lexical analysis
// stage.
//
// For a tutorial see
// http://hackthology.com/writing-a-lexer-in-go-with-lexmachine.html
//
// Example of defining a lexer
//
// // CreateLexer defines a lexer for the graphviz dot language.
// func CreateLexer() (*lexmachine.Lexer, error) {
// lexer := lexmachine.NewLexer()
//
// for _, lit := range Literals {
// r := "\\" + strings.Join(strings.Split(lit, ""), "\\")
// lexer.Add([]byte(r), token(lit))
// }
// for _, name := range Keywords {
// lexer.Add([]byte(strings.ToLower(name)), token(name))
// }
//
// lexer.Add([]byte(`//[^\n]*\n?`), token("COMMENT"))
// lexer.Add([]byte(`/\*([^*]|\r|\n|(\*+([^*/]|\r|\n)))*\*+/`), token("COMMENT"))
// lexer.Add([]byte(`([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*`), token("ID"))
// lexer.Add([]byte(`"([^\\"]|(\\.))*"`), token("ID"))
// lexer.Add([]byte("( |\t|\n|\r)+"), skip)
// lexer.Add([]byte(`\<`),
// func(scan *lexmachine.Scanner, match *machines.Match) (interface{}, error) {
// str := make([]byte, 0, 10)
// str = append(str, match.Bytes...)
// brackets := 1
// match.EndLine = match.StartLine
// match.EndColumn = match.StartColumn
// for tc := scan.TC; tc < len(scan.Text); tc++ {
// str = append(str, scan.Text[tc])
// match.EndColumn += 1
// if scan.Text[tc] == '\n' {
// match.EndLine += 1
// }
// if scan.Text[tc] == '<' {
// brackets += 1
// } else if scan.Text[tc] == '>' {
// brackets -= 1
// }
// if brackets == 0 {
// match.TC = scan.TC
// scan.TC = tc + 1
// match.Bytes = str
// return token("ID")(scan, match)
// }
// }
// return nil,
// fmt.Errorf("unclosed HTML literal starting at %d, (%d, %d)",
// match.TC, match.StartLine, match.StartColumn)
// },
// )
//
// err := lexer.Compile()
// if err != nil {
// return nil, err
// }
// return lexer, nil
// }
//
// func token(name string) lex.Action {
// return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
// return s.Token(TokenIds[name], string(m.Bytes), m), nil
// }
// }
//
// Example of using a lexer
//
// func ExampleLex() error {
// lexer, err := CreateLexer()
// if err != nil {
// return err
// }
// scanner, err := lexer.Scanner([]byte(`digraph {
// rankdir=LR;
// a [label="a" shape=box];
// c [<label>=<<u>C</u>>];
// b [label="bb"];
// a -> c;
// c -> b;
// d -> c;
// b -> a;
// b -> e;
// e -> f;
// }`))
// if err != nil {
// return err
// }
// fmt.Println("Type | Lexeme | Position")
// fmt.Println("--------+------------+------------")
// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
// if err != nil {
// return err
// }
// token := tok.(*lexmachine.Token)
// fmt.Printf("%-7v | %-10v | %v:%v-%v:%v\n",
// dot.Tokens[token.Type],
// string(token.Lexeme),
// token.StartLine,
// token.StartColumn,
// token.EndLine,
// token.EndColumn)
// }
// return nil
// }
//
package lexmachine