diff --git a/benches/usage.rs b/benches/usage.rs index 0bb5183..023b21a 100644 --- a/benches/usage.rs +++ b/benches/usage.rs @@ -3,7 +3,7 @@ extern crate json_tools; extern crate test; -use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType, TokenReader, Token, Buffer, Span}; +use json_tools::{Buffer, BufferType, FilterTypedKeyValuePairs, Lexer, Span, Token, TokenReader, TokenType}; use std::io; const NULL_RIDDEN: &'static str = r##" @@ -124,33 +124,30 @@ struct KeyValueProducer { impl KeyValueProducer { fn new(bt: BufferType) -> KeyValueProducer { KeyValueProducer { - buf: [Token { - kind: TokenType::String, - buf: match bt { - BufferType::Bytes(_) => Buffer::MultiByte(KEY_VALUE_SRC[0..5].into()), - BufferType::Span => Buffer::Span(Span { first: 0, end: 5 }), - }, - }, - Token { - kind: TokenType::Colon, - buf: Buffer::Span(Span::default()), - }, - Token { - kind: TokenType::String, - buf: match bt { - BufferType::Bytes(_) => Buffer::MultiByte(KEY_VALUE_SRC[6..25].into()), - BufferType::Span => { - Buffer::Span(Span { - first: 6, - end: 25, - }) - } - }, - }, - Token { - kind: TokenType::Comma, - buf: Buffer::Span(Span::default()), - }], + buf: [ + Token { + kind: TokenType::String, + buf: match bt { + BufferType::Bytes(_) => Buffer::MultiByte(KEY_VALUE_SRC[0..5].into()), + BufferType::Span => Buffer::Span(Span { first: 0, end: 5 }), + }, + }, + Token { + kind: TokenType::Colon, + buf: Buffer::Span(Span::default()), + }, + Token { + kind: TokenType::String, + buf: match bt { + BufferType::Bytes(_) => Buffer::MultiByte(KEY_VALUE_SRC[6..25].into()), + BufferType::Span => Buffer::Span(Span { first: 6, end: 25 }), + }, + }, + Token { + kind: TokenType::Comma, + buf: Buffer::Span(Span::default()), + }, + ], cur: 0, } } @@ -184,8 +181,10 @@ fn span_lexer_throughput(b: &mut test::Bencher) { #[bench] fn span_lexer_span_token_reader_throughput(b: &mut test::Bencher) { b.iter(|| { - let mut r = TokenReader::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span), - Some(NULL_RIDDEN)); + let mut r = TokenReader::new( + Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span), + Some(NULL_RIDDEN), + ); io::copy(&mut r, &mut io::sink()).ok(); }); b.bytes = NULL_RIDDEN.len() as u64; @@ -194,8 +193,10 @@ fn span_lexer_span_token_reader_throughput(b: &mut test::Bencher) { #[bench] fn span_lexer_bytes_token_reader_throughput(b: &mut test::Bencher) { b.iter(|| { - let mut r = TokenReader::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Bytes(128)), - None); + let mut r = TokenReader::new( + Lexer::new(NULL_RIDDEN.bytes(), BufferType::Bytes(128)), + None, + ); io::copy(&mut r, &mut io::sink()).ok(); }); b.bytes = NULL_RIDDEN.len() as u64; @@ -205,8 +206,10 @@ fn span_lexer_bytes_token_reader_throughput(b: &mut test::Bencher) { fn bytes_token_producer_bytes_token_reader_throughput(b: &mut test::Bencher) { let mut ncb = 0u64; b.iter(|| { - let mut r = TokenReader::new(KeyValueProducer::new(BufferType::Bytes(0)).take(30000), - None); + let mut r = TokenReader::new( + KeyValueProducer::new(BufferType::Bytes(0)).take(30000), + None, + ); ncb = io::copy(&mut r, &mut io::sink()).unwrap(); }); b.bytes = ncb; @@ -216,8 +219,10 @@ fn bytes_token_producer_bytes_token_reader_throughput(b: &mut test::Bencher) { fn span_token_producer_bytes_token_reader_throughput(b: &mut test::Bencher) { let mut ncb = 0u64; b.iter(|| { - let mut r = TokenReader::new(KeyValueProducer::new(BufferType::Span).take(30000), - Some(KEY_VALUE_SRC)); + let mut r = TokenReader::new( + KeyValueProducer::new(BufferType::Span).take(30000), + Some(KEY_VALUE_SRC), + ); ncb = io::copy(&mut r, &mut io::sink()).unwrap(); }); b.bytes = ncb; @@ -234,12 +239,13 @@ fn bytes_lexer_throughput(b: &mut test::Bencher) { b.bytes = NULL_RIDDEN.len() as u64; } - #[bench] fn span_filter_null_throughput(b: &mut test::Bencher) { b.iter(|| { - let f = FilterTypedKeyValuePairs::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span), - TokenType::Null); + let f = FilterTypedKeyValuePairs::new( + Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span), + TokenType::Null, + ); for t in f { test::black_box(t); } @@ -247,14 +253,17 @@ fn span_filter_null_throughput(b: &mut test::Bencher) { b.bytes = NULL_RIDDEN.len() as u64; } - #[bench] fn span_lexer_throughput_with_cursor(b: &mut test::Bencher) { use std::io::{Cursor, Read}; b.iter(|| { - let it = Lexer::new(Cursor::new(NULL_RIDDEN.as_bytes()).bytes().filter_map(|r| r.ok()), - BufferType::Span); + let it = Lexer::new( + Cursor::new(NULL_RIDDEN.as_bytes()) + .bytes() + .filter_map(|r| r.ok()), + BufferType::Span, + ); for t in it { test::black_box(t); } diff --git a/src/iter_ext.rs b/src/iter_ext.rs index 31ed71e..c824d2a 100644 --- a/src/iter_ext.rs +++ b/src/iter_ext.rs @@ -1,5 +1,4 @@ -use super::{Token, FilterTypedKeyValuePairs, TokenType, TokenReader}; - +use super::{FilterTypedKeyValuePairs, Token, TokenReader, TokenType}; /// Applies convenience constructors to all `Iterator` types pub trait IteratorExt: Iterator { @@ -8,7 +7,8 @@ pub trait IteratorExt: Iterator { /// /// It is useful, for example, to get rid of `null` values on a lexical level. fn filter_key_value_by_type(self, token_type: TokenType) -> FilterTypedKeyValuePairs - where Self: Sized + where + Self: Sized, { FilterTypedKeyValuePairs::new(self, token_type) } @@ -21,7 +21,8 @@ pub trait IteratorExt: Iterator { /// serializing tokens, as they can refer to their original /// `&str` slice. fn reader<'a>(self, source: Option<&'a str>) -> TokenReader - where Self: Sized + where + Self: Sized, { TokenReader::new(self, source) } diff --git a/src/key_value_filter.rs b/src/key_value_filter.rs index 3064d87..94edf17 100644 --- a/src/key_value_filter.rs +++ b/src/key_value_filter.rs @@ -40,7 +40,8 @@ impl> FilterTypedKeyValuePairs { } impl Iterator for FilterTypedKeyValuePairs - where I: IntoIterator +where + I: IntoIterator, { type Item = Token; @@ -136,9 +137,9 @@ impl Iterator for FilterTypedKeyValuePairs } } _ => return first_token(&mut self.buf, first_str_candidate), - }// end match token kind (string?) - }// end inner str candidate LOOP - }// end have token + } // end match token kind (string?) + } // end inner str candidate LOOP + } // end have token None => None, } } diff --git a/src/lexer.rs b/src/lexer.rs index e3c971f..84d3560 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -112,7 +112,8 @@ pub enum BufferType { } impl Lexer - where I: IntoIterator +where + I: IntoIterator, { /// Returns a new Lexer from a given byte iterator. pub fn new(chars: I, buffer_type: BufferType) -> Lexer { @@ -170,7 +171,8 @@ enum Mode { } impl Iterator for Lexer - where I: IntoIterator +where + I: IntoIterator, { type Item = Token; @@ -229,21 +231,19 @@ impl Iterator for Lexer continue; } } - Mode::Number => { - match c { - b'0'...b'9' | b'-' | b'.' => { - if let Some(ref mut v) = buf { - v.push(c); - } - continue; - } - _ => { - t = Some(TokenType::Number); - self.put_back(c); - break; + Mode::Number => match c { + b'0'...b'9' | b'-' | b'.' => { + if let Some(ref mut v) = buf { + v.push(c); } + continue; } - } + _ => { + t = Some(TokenType::Number); + self.put_back(c); + break; + } + }, Mode::True(ref mut b, ref mut i) => { b[*i] = c; if *i == 3 { @@ -343,10 +343,10 @@ impl Iterator for Lexer break; } _ => {} - }// end single byte match - }// end case SlowPath - }// end match state - }// end for each byte + } // end single byte match + } // end case SlowPath + } // end match state + } // end for each byte match t { None => None, @@ -355,19 +355,13 @@ impl Iterator for Lexer None } else { let buf = match (&t, buf) { - (&TokenType::String, Some(b)) | - (&TokenType::Number, Some(b)) => Buffer::MultiByte(b), - _ => { - Buffer::Span(Span { - first: first, - end: self.cursor, - }) - } + (&TokenType::String, Some(b)) | (&TokenType::Number, Some(b)) => Buffer::MultiByte(b), + _ => Buffer::Span(Span { + first: first, + end: self.cursor, + }), }; - Some(Token { - kind: t, - buf: buf, - }) + Some(Token { kind: t, buf: buf }) } } } diff --git a/src/lib.rs b/src/lib.rs index 724c597..1ccfb8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ mod key_value_filter; mod reader; mod iter_ext; -pub use lexer::{Lexer, Token, TokenType, Span, BufferType, Buffer}; +pub use lexer::{Buffer, BufferType, Lexer, Span, Token, TokenType}; pub use key_value_filter::FilterTypedKeyValuePairs; pub use reader::TokenReader; pub use iter_ext::IteratorExt; diff --git a/src/reader.rs b/src/reader.rs index ff102f8..19ff8e7 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2,7 +2,7 @@ use std::io::{Read, Result, Write}; use std::cmp; use std::ptr; -use super::{Token, TokenType, Buffer}; +use super::{Buffer, Token, TokenType}; fn copy_memory(src: &[u8], dst: &mut [u8]) { let len_src = src.len(); @@ -72,17 +72,13 @@ impl<'a, I: IntoIterator> Read for TokenReader<'a, I> { None => return Ok(buf.len() - bl), Some(t) => { let bytes: &[u8] = match t.kind { - TokenType::String | TokenType::Number => { - match t.buf { - Buffer::MultiByte(ref b) => &b, - Buffer::Span(ref s) => { - match self.src { - Some(b) => b[s.first as usize..s.end as usize].as_bytes(), - None => panic!("Must set source if tokens don't provide byter buffers"), - } - } - } - } + TokenType::String | TokenType::Number => match t.buf { + Buffer::MultiByte(ref b) => &b, + Buffer::Span(ref s) => match self.src { + Some(b) => b[s.first as usize..s.end as usize].as_bytes(), + None => panic!("Must set source if tokens don't provide byter buffers"), + }, + }, TokenType::Invalid => "".as_bytes(), _ => t.kind.as_ref().as_bytes(), }; @@ -100,8 +96,8 @@ impl<'a, I: IntoIterator> Read for TokenReader<'a, I> { return Ok(buf.len()); } } - }// match iter.next() - }// end while there are bytes to produce + } // match iter.next() + } // end while there are bytes to produce unreachable!(); } } diff --git a/tests/filters.rs b/tests/filters.rs index 7e7dc23..ef6ef4d 100644 --- a/tests/filters.rs +++ b/tests/filters.rs @@ -1,19 +1,40 @@ extern crate json_tools; -use std::io::{self, Read, Cursor}; -use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType, TokenReader, IteratorExt}; +use std::io::{self, Cursor, Read}; +use json_tools::{BufferType, FilterTypedKeyValuePairs, IteratorExt, Lexer, TokenReader, TokenType}; #[test] fn filter_null_values() { - for &(src, want, count, fcount) in - &[(r#"{ "s":null, "s":true, "s":null }"#, r#"{"s":true}"#, 13, 5), - (r#"{"s " : null, "s":null, "s":null }"#, r#"{}"#, 13, 2), - (r#"{"s":true, "s":null, "s":null }"#, r#"{"s":true}"#, 13, 5), - (r#"{"s":true, "s":null "s":null }"#, r#"{"s":true}"#, 12, 5), // invalid is fine - (r#"{"s":true,,,, "s":null, "s":null }"#, r#"{"s":true,,,}"#, 16, 8), - (r#"{"s":null, "s":null, "s":true }"#, r#"{"s":true}"#, 13, 5), - (r#"{"s":true, "s":null, "s":true }"#, r#"{"s":true,"s":true}"#, 13, 9), - (r#"{"s":true, "s":null "s":true }"#, r#"{"s":true"s":true}"#, 12, 8)] { + for &(src, want, count, fcount) in &[ + ( + r#"{ "s":null, "s":true, "s":null }"#, + r#"{"s":true}"#, + 13, + 5, + ), + (r#"{"s " : null, "s":null, "s":null }"#, r#"{}"#, 13, 2), + (r#"{"s":true, "s":null, "s":null }"#, r#"{"s":true}"#, 13, 5), + (r#"{"s":true, "s":null "s":null }"#, r#"{"s":true}"#, 12, 5), // invalid is fine + ( + r#"{"s":true,,,, "s":null, "s":null }"#, + r#"{"s":true,,,}"#, + 16, + 8, + ), + (r#"{"s":null, "s":null, "s":true }"#, r#"{"s":true}"#, 13, 5), + ( + r#"{"s":true, "s":null, "s":true }"#, + r#"{"s":true,"s":true}"#, + 13, + 9, + ), + ( + r#"{"s":true, "s":null "s":true }"#, + r#"{"s":true"s":true}"#, + 12, + 8, + ), + ] { assert_eq!(Lexer::new(src.bytes(), BufferType::Span).count(), count); let new_filter = |bt| FilterTypedKeyValuePairs::new(Lexer::new(src.bytes(), bt), TokenType::Null); assert_eq!(new_filter(BufferType::Span).count(), fcount); diff --git a/tests/lexer.rs b/tests/lexer.rs index 7ea0398..310d146 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -1,77 +1,80 @@ extern crate json_tools; -use json_tools::{Lexer, Token, Span, TokenType, BufferType, Buffer}; +use json_tools::{Buffer, BufferType, Lexer, Span, Token, TokenType}; #[test] fn string_value() { let src = r#"{ "face": "😂" }"#; let mut it = Lexer::new(src.bytes(), BufferType::Span); - assert_eq!(it.next(), - Some(Token { - kind: TokenType::CurlyOpen, - buf: Buffer::Span(Span { first: 0, end: 1 }), - })); - assert_eq!(it.next(), - Some(Token { - kind: TokenType::String, - buf: Buffer::Span(Span { first: 2, end: 8 }), - })); - assert_eq!(it.next(), - Some(Token { - kind: TokenType::Colon, - buf: Buffer::Span(Span { first: 8, end: 9 }), - })); - assert_eq!(it.next(), - Some(Token { - kind: TokenType::String, - buf: Buffer::Span(Span { - first: 10, - end: 16, - }), - })); - assert_eq!(it.next(), - Some(Token { - kind: TokenType::CurlyClose, - buf: Buffer::Span(Span { - first: 17, - end: 18, - }), - })); + assert_eq!( + it.next(), + Some(Token { + kind: TokenType::CurlyOpen, + buf: Buffer::Span(Span { first: 0, end: 1 }), + }) + ); + assert_eq!( + it.next(), + Some(Token { + kind: TokenType::String, + buf: Buffer::Span(Span { first: 2, end: 8 }), + }) + ); + assert_eq!( + it.next(), + Some(Token { + kind: TokenType::Colon, + buf: Buffer::Span(Span { first: 8, end: 9 }), + }) + ); + assert_eq!( + it.next(), + Some(Token { + kind: TokenType::String, + buf: Buffer::Span(Span { first: 10, end: 16 }), + }) + ); + assert_eq!( + it.next(), + Some(Token { + kind: TokenType::CurlyClose, + buf: Buffer::Span(Span { first: 17, end: 18 }), + }) + ); } - #[test] fn string_escaping() { let src = r#"{"s":"\"in\""}"#; let it = Lexer::new(src.bytes(), BufferType::Span); - assert_eq!(it.skip(3).next(), - Some(Token { - kind: TokenType::String, - buf: Buffer::Span(Span { - first: 5, - end: 13, - }), - })); + assert_eq!( + it.skip(3).next(), + Some(Token { + kind: TokenType::String, + buf: Buffer::Span(Span { first: 5, end: 13 }), + }) + ); // '\"' makes us ignore the beginning of the string, and we never hit the end let src = r#"{"s":\"foo"}"#; let mut it = Lexer::new(src.bytes(), BufferType::Span); // this is the '\' character - only valid within a string - assert_eq!(it.by_ref().skip(3).next(), - Some(Token { - kind: TokenType::Invalid, - buf: Buffer::Span(Span { first: 5, end: 6 }), - })); + assert_eq!( + it.by_ref().skip(3).next(), + Some(Token { + kind: TokenType::Invalid, + buf: Buffer::Span(Span { first: 5, end: 6 }), + }) + ); // now comes the string - assert_eq!(it.next(), - Some(Token { - kind: TokenType::String, - buf: Buffer::Span(Span { - first: 6, - end: 11, - }), - })); + assert_eq!( + it.next(), + Some(Token { + kind: TokenType::String, + buf: Buffer::Span(Span { first: 6, end: 11 }), + }) + ); assert!(it.next().is_some()); // last one hit the end already˚ @@ -85,11 +88,13 @@ fn unclosed_string_value() { let mut it = Lexer::new(src.bytes(), BufferType::Span); // unclosed strings are invalid - assert_eq!(it.by_ref().skip(3).next(), - Some(Token { - kind: TokenType::Invalid, - buf: Buffer::Span(Span { first: 5, end: 8 }), - })); + assert_eq!( + it.by_ref().skip(3).next(), + Some(Token { + kind: TokenType::Invalid, + buf: Buffer::Span(Span { first: 5, end: 8 }), + }) + ); } #[test] @@ -97,52 +102,52 @@ fn backslash_escapes_backslash_in_string_value() { let src = r#"{"s":"f\\"}"#; let mut it = Lexer::new(src.bytes(), BufferType::Span); - assert_eq!(it.by_ref().skip(3).next(), - Some(Token { - kind: TokenType::String, - buf: Buffer::Span(Span { - first: 5, - end: 10, - }), - })); + assert_eq!( + it.by_ref().skip(3).next(), + Some(Token { + kind: TokenType::String, + buf: Buffer::Span(Span { first: 5, end: 10 }), + }) + ); let src = r#"{"s":"f\"}"#; let mut it = Lexer::new(src.bytes(), BufferType::Span); - assert_eq!(it.by_ref().skip(3).next(), - Some(Token { - kind: TokenType::Invalid, - buf: Buffer::Span(Span { - first: 5, - end: 10, - }), - })); + assert_eq!( + it.by_ref().skip(3).next(), + Some(Token { + kind: TokenType::Invalid, + buf: Buffer::Span(Span { first: 5, end: 10 }), + }) + ); } - #[test] fn special_values_closed_and_unclosed() { - for &(src, ref kind, first, end) in - &[(r#"{"v":null}"#, TokenType::Null, 5, 9), - (r#"{"v":nuxl}"#, TokenType::Invalid, 5, 9), - (r#"{"v":true}"#, TokenType::BooleanTrue, 5, 9), - (r#"{"v":trux}"#, TokenType::Invalid, 5, 9), - (r#"{"v":false}"#, TokenType::BooleanFalse, 5, 10), - (r#"{"v":falsze}"#, TokenType::Invalid, 5, 10), - (r#"{"v":123}"#, TokenType::Number, 5, 8), - (r#"{"v":-123}"#, TokenType::Number, 5, 9), - (r#"{"v":1.23}"#, TokenType::Number, 5, 9), - (r#"{"v":-1.23}"#, TokenType::Number, 5, 10), - (r#"{"v":1.}"#, TokenType::Number, 5, 7), - (r#"{"v":.}"#, TokenType::Number, 5, 6)] { - assert_eq!(Lexer::new(src.bytes(), BufferType::Span).skip(3).next(), - Some(Token { - kind: kind.clone(), - buf: Buffer::Span(Span { - first: first, - end: end, - }), - })); + for &(src, ref kind, first, end) in &[ + (r#"{"v":null}"#, TokenType::Null, 5, 9), + (r#"{"v":nuxl}"#, TokenType::Invalid, 5, 9), + (r#"{"v":true}"#, TokenType::BooleanTrue, 5, 9), + (r#"{"v":trux}"#, TokenType::Invalid, 5, 9), + (r#"{"v":false}"#, TokenType::BooleanFalse, 5, 10), + (r#"{"v":falsze}"#, TokenType::Invalid, 5, 10), + (r#"{"v":123}"#, TokenType::Number, 5, 8), + (r#"{"v":-123}"#, TokenType::Number, 5, 9), + (r#"{"v":1.23}"#, TokenType::Number, 5, 9), + (r#"{"v":-1.23}"#, TokenType::Number, 5, 10), + (r#"{"v":1.}"#, TokenType::Number, 5, 7), + (r#"{"v":.}"#, TokenType::Number, 5, 6), + ] { + assert_eq!( + Lexer::new(src.bytes(), BufferType::Span).skip(3).next(), + Some(Token { + kind: kind.clone(), + buf: Buffer::Span(Span { + first: first, + end: end, + }), + }) + ); } }