Simplified Protocol Buffers Version 3 and gRPC parser using Haskell and Parsec
> stack run -- --help
Usage: protobuf-parser-exe [-f|--file PATH] [-p|--pretty] [STRING...]
Available options:
-f,--file PATH Specify file path to parse
-p,--pretty Enable pretty print
-h,--help Show this help text
stack run -- -p -f ./test/E2E/protofiles/chat.proto
stack run -- -p "message SearchRequest { int32 page_number = 2; double results_per_page = 3; }"
stack run "import \"foo.proto\"; import \"bar.proto\"; package foobar;"
stack test
protobuf-parser
├── app
│ └── Main.hs -> CLI Parsing
├── ...
├── src
│ └── Text
│ └── Protobuf
│ ├── Parser -> Parser Combinators
│ │ ├── ...
│ │ └── *.hs
│ ├── Parser.hs -> Complete Protobuf Parser
│ └── Types.hs -> Protobuf Type representation
├── ...
└── test
├── E2E
│ ├── ...
│ └── protofiles -> Example Protobuf files
│ └── *.proto
├── ...
└── Unit
└── ...
This projects acts as a parser combinator showcase project. Therefore, not all features are complete or correct:
- Only proto3 syntax is supported
- Not all values are check for correctness
- Base Lexical Elements do not strictly follow the official spec
- Proto 3 Ranges do not allow the keyword "min"
- Empty statements are missing
- Import weak and public are missing
The correct and complete Grammar can be found at the official Protocol Buffers Version 3 Language Specification
Following is basic syntax in Extended Backus-Naur Form (EBNF):
| alternation
() grouping
[] option (zero or one time)
{} repetition (any number of times)
syntax = "syntax" "=" ("'" "proto3" "'" | '"' "proto3" '"') ";"
import = "import" [ "weak" | "public" ] strLit ";"
package = "package" fullIdent ";"
constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
strLit | boolLit | MessageValue
option = "option" optionName "=" constant ";"
optionName = ( ident | "(" ["."] fullIdent ")" )
type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
| "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
| "bool" | "string" | "bytes" | messageType | enumType
fieldNumber = intLit;
field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
fieldOptions = fieldOption { "," fieldOption }
fieldOption = optionName "=" constant
oneof = "oneof" oneofName "{" { option | oneofField } "}"
oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
"fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
reserved = "reserved" ( ranges | strFieldNames ) ";"
ranges = range { "," range }
range = intLit [ "to" ( intLit | "max" ) ]
strFieldNames = strFieldName { "," strFieldName }
strFieldName = "'" fieldName "'" | '"' fieldName '"'
enum = "enum" enumName enumBody
enumBody = "{" { option | enumField | emptyStatement | reserved } "}"
enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";"
enumValueOption = optionName "=" constant
message = "message" messageName messageBody
messageBody = "{" { field | enum | message | option | oneof | mapField |
reserved | emptyStatement } "}"
service = "service" serviceName "{" { option | rpc | emptyStatement } "}"
rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ]
messageType ")" (( "{" {option | emptyStatement } "}" ) | ";")
proto = syntax { import | package | option | topLevelDef | emptyStatement }
topLevelDef = message | enum | service