Skip to content

Commit

Permalink
simplification and modernization
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Feb 1, 2020
1 parent 520763f commit 4f168d4
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 115 deletions.
6 changes: 0 additions & 6 deletions .clog.toml

This file was deleted.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
[package]
name = "json-tools"
version = "1.1.0"
edition = "2018"
version = "1.1.1"
authors = ["Sebastian Thiel <[email protected]>"]
license = "MIT/Apache-2.0"
description = "A zero-copy json-lexer, filters and serializer."
repository = "https://github.com/Byron/json-tools"
keywords = ["json", "tools", "lexer"]
documentation = "https://docs.rs/crate/json-tools"
exclude = ["tests", "benches", "rustfmt.toml"]
32 changes: 6 additions & 26 deletions benches/usage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,7 @@ fn span_lexer_throughput(b: &mut test::Bencher) {
#[bench]
fn span_lexer_span_token_reader_throughput(b: &mut test::Bencher) {
b.iter(|| {
let mut r = TokenReader::new(
Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span),
Some(NULL_RIDDEN),
);
let mut r = TokenReader::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span), Some(NULL_RIDDEN));
io::copy(&mut r, &mut io::sink()).ok();
});
b.bytes = NULL_RIDDEN.len() as u64;
Expand All @@ -193,10 +190,7 @@ fn span_lexer_span_token_reader_throughput(b: &mut test::Bencher) {
#[bench]
fn span_lexer_bytes_token_reader_throughput(b: &mut test::Bencher) {
b.iter(|| {
let mut r = TokenReader::new(
Lexer::new(NULL_RIDDEN.bytes(), BufferType::Bytes(128)),
None,
);
let mut r = TokenReader::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Bytes(128)), None);
io::copy(&mut r, &mut io::sink()).ok();
});
b.bytes = NULL_RIDDEN.len() as u64;
Expand All @@ -206,10 +200,7 @@ fn span_lexer_bytes_token_reader_throughput(b: &mut test::Bencher) {
fn bytes_token_producer_bytes_token_reader_throughput(b: &mut test::Bencher) {
let mut ncb = 0u64;
b.iter(|| {
let mut r = TokenReader::new(
KeyValueProducer::new(BufferType::Bytes(0)).take(30000),
None,
);
let mut r = TokenReader::new(KeyValueProducer::new(BufferType::Bytes(0)).take(30000), None);
ncb = io::copy(&mut r, &mut io::sink()).unwrap();
});
b.bytes = ncb;
Expand All @@ -219,10 +210,7 @@ fn bytes_token_producer_bytes_token_reader_throughput(b: &mut test::Bencher) {
fn span_token_producer_bytes_token_reader_throughput(b: &mut test::Bencher) {
let mut ncb = 0u64;
b.iter(|| {
let mut r = TokenReader::new(
KeyValueProducer::new(BufferType::Span).take(30000),
Some(KEY_VALUE_SRC),
);
let mut r = TokenReader::new(KeyValueProducer::new(BufferType::Span).take(30000), Some(KEY_VALUE_SRC));
ncb = io::copy(&mut r, &mut io::sink()).unwrap();
});
b.bytes = ncb;
Expand All @@ -242,10 +230,7 @@ fn bytes_lexer_throughput(b: &mut test::Bencher) {
#[bench]
fn span_filter_null_throughput(b: &mut test::Bencher) {
b.iter(|| {
let f = FilterTypedKeyValuePairs::new(
Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span),
TokenType::Null,
);
let f = FilterTypedKeyValuePairs::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span), TokenType::Null);
for t in f {
test::black_box(t);
}
Expand All @@ -258,12 +243,7 @@ fn span_lexer_throughput_with_cursor(b: &mut test::Bencher) {
use std::io::{Cursor, Read};

b.iter(|| {
let it = Lexer::new(
Cursor::new(NULL_RIDDEN.as_bytes())
.bytes()
.filter_map(|r| r.ok()),
BufferType::Span,
);
let it = Lexer::new(Cursor::new(NULL_RIDDEN.as_bytes()).bytes().filter_map(|r| r.ok()), BufferType::Span);
for t in it {
test::black_box(t);
}
Expand Down
30 changes: 0 additions & 30 deletions etc/sublime-text/json-tools.sublime-project

This file was deleted.

8 changes: 4 additions & 4 deletions src/key_value_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ where
let res = first_token(&mut self.buf, first_str_token);
self.buf.push_back(colon_candidate);
return res;
}// end is colon token
}// end have token (colon?)
} // end is colon token
} // end have token (colon?)
None => return first_token(&mut self.buf, first_str_token),
}// end match next token (colon?)
}// end is string token,
} // end match next token (colon?)
} // end is string token,
TokenType::Comma => {
// NOTE: in case of malformed ,,,,, sequences, we just consider
// this a peek, return the previous comma, and put back this one
Expand Down
19 changes: 8 additions & 11 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub enum Buffer {
#[derive(Debug, PartialEq, Clone)]
pub enum BufferType {
/// Use a `Buffer::MultiByte` were appropriate. Initialize it with the
/// given capcity (to obtain higher performance when pushing charcters)
/// given capacity (to obtain higher performance when pushing characters)
Bytes(usize),
Span,
}
Expand All @@ -121,7 +121,7 @@ where
chars: chars.into_iter(),
next_byte: None,
cursor: 0,
buffer_type: buffer_type,
buffer_type,
}
}

Expand Down Expand Up @@ -176,7 +176,7 @@ where
{
type Item = Token;

/// Lex the underlying bytte stream to generate tokens
/// Lex the underlying byte stream to generate tokens
fn next(&mut self) -> Option<Token> {
let mut t: Option<TokenType> = None;

Expand Down Expand Up @@ -217,7 +217,7 @@ where
}
if *ign_digits > 0 {
match c {
b'0'...b'9' | b'A'...b'F' | b'a'...b'f' => {
b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
*ign_digits -= 1;
continue;
}
Expand Down Expand Up @@ -257,7 +257,7 @@ where
}
}
Mode::Number => match c {
b'0'...b'9' | b'-' | b'+' | b'.' | b'E' | b'e' => {
b'0'..=b'9' | b'-' | b'+' | b'.' | b'E' | b'e' => {
if let Some(ref mut v) = buf {
v.push(c);
}
Expand Down Expand Up @@ -325,7 +325,7 @@ where
state = Mode::Null([c, b'x', b'x', b'x'], 1);
set_cursor(self.cursor);
}
b'0'...b'9' | b'-' | b'.' => {
b'0'..=b'9' | b'-' | b'.' => {
state = Mode::Number;
if let Some(ref mut v) = buf {
v.push(c);
Expand Down Expand Up @@ -381,12 +381,9 @@ where
} else {
let buf = match (&t, buf) {
(&TokenType::String, Some(b)) | (&TokenType::Number, Some(b)) => Buffer::MultiByte(b),
_ => Buffer::Span(Span {
first: first,
end: self.cursor,
}),
_ => Buffer::Span(Span { first, end: self.cursor }),
};
Some(Token { kind: t, buf: buf })
Some(Token { kind: t, buf })
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
//! For usage examples, please have a look at the *tests* and *benchmarks*.
mod lexer;
mod iter_ext;
mod key_value_filter;
mod lexer;
mod reader;
mod iter_ext;

pub use lexer::{Buffer, BufferType, Lexer, Span, Token, TokenType};
pub use iter_ext::IteratorExt;
pub use key_value_filter::FilterTypedKeyValuePairs;
pub use lexer::{Buffer, BufferType, Lexer, Span, Token, TokenType};
pub use reader::TokenReader;
pub use iter_ext::IteratorExt;
10 changes: 6 additions & 4 deletions src/reader.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::io::{Read, Result, Write};
use std::cmp;
use std::ptr;
use std::{
cmp,
io::{Read, Result, Write},
ptr,
};

use super::{Buffer, Token, TokenType};

Expand Down Expand Up @@ -89,7 +91,7 @@ impl<'a, I: IntoIterator<Item = Token>> Read for TokenReader<'a, I> {

if btc < bytes.len() {
debug_assert!(bl == 0);
try!(self.buf.write_all(&bytes[btc..]));
self.buf.write_all(&bytes[btc..])?;
}

if bl == 0 {
Expand Down
30 changes: 5 additions & 25 deletions tests/filters.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,19 @@
extern crate json_tools;

use std::io::{self, Cursor, Read};
use json_tools::{BufferType, FilterTypedKeyValuePairs, IteratorExt, Lexer, TokenReader, TokenType};
use std::io::{self, Cursor, Read};

#[test]
fn filter_null_values() {
for &(src, want, count, fcount) in &[
(
r#"{ "s":null, "s":true, "s":null }"#,
r#"{"s":true}"#,
13,
5,
),
(r#"{ "s":null, "s":true, "s":null }"#, r#"{"s":true}"#, 13, 5),
(r#"{"s " : null, "s":null, "s":null }"#, r#"{}"#, 13, 2),
(r#"{"s":true, "s":null, "s":null }"#, r#"{"s":true}"#, 13, 5),
(r#"{"s":true, "s":null "s":null }"#, r#"{"s":true}"#, 12, 5), // invalid is fine
(
r#"{"s":true,,,, "s":null, "s":null }"#,
r#"{"s":true,,,}"#,
16,
8,
),
(r#"{"s":true,,,, "s":null, "s":null }"#, r#"{"s":true,,,}"#, 16, 8),
(r#"{"s":null, "s":null, "s":true }"#, r#"{"s":true}"#, 13, 5),
(
r#"{"s":true, "s":null, "s":true }"#,
r#"{"s":true,"s":true}"#,
13,
9,
),
(
r#"{"s":true, "s":null "s":true }"#,
r#"{"s":true"s":true}"#,
12,
8,
),
(r#"{"s":true, "s":null, "s":true }"#, r#"{"s":true,"s":true}"#, 13, 9),
(r#"{"s":true, "s":null "s":true }"#, r#"{"s":true"s":true}"#, 12, 8),
] {
assert_eq!(Lexer::new(src.bytes(), BufferType::Span).count(), count);
let new_filter = |bt| FilterTypedKeyValuePairs::new(Lexer::new(src.bytes(), bt), TokenType::Null);
Expand Down
5 changes: 1 addition & 4 deletions tests/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,7 @@ fn special_values_closed_and_unclosed() {
Lexer::new(src.bytes(), BufferType::Span).skip(3).next(),
Some(Token {
kind: kind.clone(),
buf: Buffer::Span(Span {
first: first,
end: end,
}),
buf: Buffer::Span(Span { first: first, end: end }),
})
);
}
Expand Down

0 comments on commit 4f168d4

Please sign in to comment.