Skip to content

Commit

Permalink
feat(token-reader): machine serialization works
Browse files Browse the repository at this point in the history
In a first version, we show that serialization without any space
works as expected according to very first tests.

More tests have to be conducted to be sure.
  • Loading branch information
Byron committed May 7, 2015
1 parent 96dac09 commit 458928d
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 4 deletions.
19 changes: 19 additions & 0 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@ pub enum TokenType {
Invalid,
}

impl AsRef<str> for TokenType {
fn as_ref(&self) -> &str {
match *self {
TokenType::CurlyOpen => "{",
TokenType::CurlyClose => "}",
TokenType::BracketOpen => "[",
TokenType::BracketClose => "]",
TokenType::Colon => ":",
TokenType::Comma => ",",
TokenType::BooleanTrue => "true",
TokenType::BooleanFalse => "false",
TokenType::Null => "null",

TokenType::Invalid => panic!("Cannot convert invalid TokenType"),
_ => panic!("Cannot convert variant TokenTypes"),
}
}
}

/// A pair of indices into the byte stream returned by our source
/// iterator.
/// It is an exclusive range.
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![feature(core, collections)]
mod lexer;
mod key_value_filter;
mod reader;
Expand Down
66 changes: 64 additions & 2 deletions src/reader.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,85 @@
use std::io::{Read, Result, self};
use std::slice::bytes::copy_memory;
use std::cmp;

use super::{Token, TokenType};
use super::{Token, TokenType, Buffer};

pub struct TokenReader<'a, I: Iterator<Item=Token>> {
iter: I,
src: Option<&'a str>,
buf: Vec<u8>
}

impl<'a, I: Iterator<Item=Token>> TokenReader<'a, I> {
pub fn new(iter: I, source: Option<&'a str>) -> TokenReader<'a, I> {
TokenReader {
iter: iter,
src: source,
buf: Vec::with_capacity(128)
}
}
}

impl<'a, I: Iterator<Item=Token>> Read for TokenReader<'a, I> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
Err(io::Error::last_os_error())
if buf.len() == 0 {
return Ok(0)
}

// Bytes from Cache
let mut bl = buf.len();
if self.buf.len() > 0 {
let btc = cmp::min(self.buf.len(), buf.len());
let new_buf = self.buf.split_off(btc);
copy_memory(&self.buf, buf);
self.buf = new_buf;
bl -= btc;
}
if bl == 0 {
return Ok(buf.len());
}

// Generate bytes from tokens
while bl > 0 {
match self.iter.next() {
None => {
// if we have not read any byte yet, we may return an error
if bl == buf.len() {
return Err(io::Error::new(io::ErrorKind::Other, "End of Token-Iterator"))
} else {
return Ok(buf.len() - bl)
}
},
Some(t) => {
let bytes: &[u8] =
match t.kind {
TokenType::String
|TokenType::Number => {
match t.buf {
Buffer::MultiByte(ref b) => &b,
Buffer::Span(ref s) => match self.src {
Some(b) => b[s.first as usize .. s.end as usize].as_bytes(),
None => panic!("Must set source if tokens don't provide byter buffers"),
}
}
},
_ => t.kind.as_ref().as_bytes(),
};
let btc = cmp::min(bytes.len(), bl);
copy_memory(&bytes[..btc], buf);
bl -= btc;

if btc < bytes.len() {
debug_assert!(bl == 0);
self.buf.push_all(&bytes[btc..])
}

if bl == 0 {
return Ok(buf.len())
}
}
}// match iter.next()
}// end while there are bytes to produce
unreachable!();
}
}
4 changes: 2 additions & 2 deletions tests/filters.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
extern crate json_tools;

use std::io::Read;

use std::io::{Read, self, Write};
use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType, TokenReader};

#[test]
Expand Down Expand Up @@ -39,6 +38,7 @@ fn filter_null_values() {
break
}
buf.push(byte[0]);
// writeln!(io::stderr(), "{:?}", buf).ok();
}
assert_eq!(&String::from_utf8(buf).unwrap(), want);
}
Expand Down

0 comments on commit 458928d

Please sign in to comment.