diff --git a/Cargo.toml b/Cargo.toml index 6e0a9ef..a0bf70a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json-tools" -version = "0.0.1" +version = "0.1.0" authors = ["Sebastian Thiel "] license = "MIT" description = "A collections of tools to handle json encoded data" diff --git a/src/lib.rs b/src/lib.rs index a93251b..8077e71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,84 @@ -#[test] -fn it_works() { +/// A lexer for utf-8 encoded json data +pub struct Lexer + where I: Iterator { + chars: I, + cursor: u64, } + +#[derive(Debug, PartialEq)] +pub enum TokenType { + /// All whitespace + WhiteSpace, + + /// `{` + CurlyOpen, + /// `}` + CurlyClose, + + /// `[` + BracketOpen, + /// `]` + BracketClose, + + /// `:` + Colon, + /// `,` + Comma, + + + /// A json string , like `"foo"` + StringValue, + /// `true` + BooleanTrue, + /// `false` + BooleanFalse, + /// any json number, like `1.24123` or `123` + Number, + /// `null` + NullValue, + + /// The type of the token could not be identified. + /// Should be removed if this lexer is ever to be feature complete + Invalid, +} + +/// A pair of indices into the character stream returned by our source +/// iterator. +/// It is an exclusive range. +#[derive(Debug, PartialEq)] +pub struct Span { + /// Index of the first the character + pub first: u64, + /// Index one past the last character + pub end: u64, +} + +/// A lexical token, identifying its kind and span. +#[derive(Debug, PartialEq)] +pub struct Token { + /// The exact type of the token + pub kind: TokenType, + /// The span allows to reference back into the source character stream + /// to obtain the string making up the token. + pub span: Span, +} + +impl Lexer where I: Iterator { + /// Returns a new Lexer from a given character iterator. + pub fn new(chars: I) -> Lexer { + Lexer { + chars: chars, + cursor: 0, + } + } +} + +impl Iterator for Lexer + where I: Iterator { + type Item = Token; + + /// Lex the underlying character stream to generate tokens + fn next(&mut self) -> Option { + None + } +} \ No newline at end of file diff --git a/tests/lexer.rs b/tests/lexer.rs new file mode 100644 index 0000000..f736c4d --- /dev/null +++ b/tests/lexer.rs @@ -0,0 +1,25 @@ +extern crate json_tools; + +use json_tools::{Lexer, Token, Span, TokenType}; + +#[test] +fn unicode() { + let src = r#"{ "face": "😂" }"#; + let mut it = Lexer::new(src.chars()); + + assert_eq!(it.next(), Some(Token { kind: TokenType::BracketLeft, + span: Span { first: 0, + end: 1 } })); +} + + +#[test] +fn string_escaping() { + // Add code here +} + + +#[test] +fn multi_line_strings() { + // Add code here +} \ No newline at end of file