Skip to content

Commit

Permalink
imp(null-filter): make it general
Browse files Browse the repository at this point in the history
* What's formerly known as NullFilter can now filter out all key-value
  pairs with a given key-value type.
  That way, null can be filtered, as well as numbers, for example.
* Made certain dual-branch matches an if-clause, which moved everything
  furthe to the left again, while making the code easier to read.
  • Loading branch information
Byron committed May 7, 2015
1 parent 08ad49b commit 431f051
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 58 deletions.
7 changes: 4 additions & 3 deletions benches/usage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
extern crate json_tools;
extern crate test;

use json_tools::{Lexer, FilterNull, BufferType};
use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType};

const NULL_RIDDEN: &'static str = r##"
{
Expand Down Expand Up @@ -138,7 +138,8 @@ fn bytes_lexer_throughput_in_bytes(b: &mut test::Bencher) {
#[bench]
fn span_filter_null_throughput_in_bytes(b: &mut test::Bencher) {
b.iter(|| {
let f = FilterNull::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span));
let f = FilterTypedKeyValuePairs::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span),
TokenType::Null);
for t in f {
test::black_box(t);
}
Expand Down Expand Up @@ -173,7 +174,7 @@ fn span_lexer_throughput_in_bytes_with_cursor_and_tee(b: &mut test::Bencher) {
.tee(&mut keeper)
.bytes()
.filter_map(|r|r.ok()),
BufferType::Bytes(128));
BufferType::Span);
for t in it {
test::black_box(t);
}
Expand Down
99 changes: 48 additions & 51 deletions src/filter_null.rs → src/key_value_filter.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
use super::{Token, TokenType};
use std::collections::VecDeque;

/// Removes tokens matching `,? "key": null ,?`.
/// Removes tokens matching `,? "key": <type> ,?`., where `<type>` is a given
/// token type. Useful for removing `null` values, or all numbers, for instance.
/// Is made in a resilient fashion which doesn't require a sane input token stream.
pub struct FilterNull<I: Iterator<Item=Token>> {
pub struct FilterTypedKeyValuePairs<I: Iterator<Item=Token>> {
src: I,
// NOTE: We could remove the deck and keep the 3 slots we need as Option<Token>
// 0: optional comma
// 1: first string
// 2: colon
buf: VecDeque<Token>,
next_token: Option<Token>
next_token: Option<Token>,
value_type: TokenType,
}

impl<I: Iterator<Item=Token>> FilterNull<I> {
/// Returns a new `FilterNull` instance from a `Token` iterator
pub fn new(src: I) -> FilterNull<I> {
FilterNull {
impl<I: Iterator<Item=Token>> FilterTypedKeyValuePairs<I> {
/// Returns a new `FilterTypedKeyValuePairs` instance from a `Token` iterator
pub fn new(src: I, value_type: TokenType) -> FilterTypedKeyValuePairs<I> {
FilterTypedKeyValuePairs {
src: src,
buf: VecDeque::with_capacity(3),
next_token: None
next_token: None,
value_type: value_type
}
}

Expand All @@ -36,7 +39,7 @@ impl<I: Iterator<Item=Token>> FilterNull<I> {
}
}

impl<I> Iterator for FilterNull<I> where I: Iterator<Item=Token>{
impl<I> Iterator for FilterTypedKeyValuePairs<I> where I: Iterator<Item=Token>{
type Item = Token;

fn next(&mut self) -> Option<Token> {
Expand All @@ -63,56 +66,50 @@ impl<I> Iterator for FilterNull<I> where I: Iterator<Item=Token>{
let first_str_token = first_str_candidate;
match self.next_token() {
Some(colon_candidate) => {
match colon_candidate.kind {
TokenType::Colon => {
let colon = colon_candidate;
match self.next_token() {
Some(second_str_candidate) => {
match second_str_candidate.kind {
TokenType::Null => {
// WE HAVE A STR : STR triplete, and we forget it
// This works by just not putting it onto the ringbuffer
// See if there is a (optional) comma
// If self.buf has anything, it must be a comma !
// It is only 0 or 1 !
match self.next_token() {
Some(comma_candidate) => {
first_str_candidate =
match match comma_candidate.kind {
TokenType::Comma => self.next_token(),
_ => {
self.buf.pop_front();
Some(comma_candidate)
}
} {
Some(t) => t,
None => return None,
};
continue;
},
None => return None,
}
if colon_candidate.kind == TokenType::Colon {
let colon = colon_candidate;
match self.next_token() {
Some(second_str_candidate) => {
if second_str_candidate.kind == self.value_type {
// WE HAVE A STR : STR triplete, and we forget it
// This works by just not putting it onto the ringbuffer
// See if there is a (optional) comma
// If self.buf has anything, it must be a comma !
// It is only 0 or 1 !
match self.next_token() {
Some(comma_candidate) => {
first_str_candidate =
match match comma_candidate.kind {
TokenType::Comma => self.next_token(),
_ => {
self.buf.pop_front();
Some(comma_candidate)
}
} {
Some(t) => t,
None => return None,
};
continue;
},
_ => {
let res = first_token(&mut self.buf, first_str_token);
self.buf.push_back(colon);
self.buf.push_back(second_str_candidate);
return res
}
None => return None,
}
},
None => {
} else {
let res = first_token(&mut self.buf, first_str_token);
self.buf.push_back(colon);
self.buf.push_back(second_str_candidate);
return res
}
},
None => {
let res = first_token(&mut self.buf, first_str_token);
self.buf.push_back(colon);
return res
}
},
_ => {
let res = first_token(&mut self.buf, first_str_token);
self.buf.push_back(colon_candidate);
return res
}
} else {
let res = first_token(&mut self.buf, first_str_token);
self.buf.push_back(colon_candidate);
return res
}// end is colon token
},// end have token (colon?)
None => return first_token(&mut self.buf, first_str_token),
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mod lexer;
mod filter_null;
mod key_value_filter;

pub use lexer::{Lexer, Token, TokenType, Span, BufferType, Buffer};
pub use filter_null::{FilterNull};
pub use key_value_filter::{FilterTypedKeyValuePairs};
5 changes: 3 additions & 2 deletions tests/filters.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
extern crate json_tools;

use json_tools::{Lexer, FilterNull, BufferType};
use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType};

#[test]
fn filter_null_values() {
Expand All @@ -13,6 +13,7 @@ fn filter_null_values() {
(r#"{"s":true, "s":null, "s":true }"#, 13, 9),
(r#"{"s":true, "s":null "s":true }"#, 12, 8),] {
assert_eq!(Lexer::new(src.bytes(), BufferType::Span).count(), count);
assert_eq!(FilterNull::new(Lexer::new(src.bytes(), BufferType::Span)).count(), fcount);
assert_eq!(FilterTypedKeyValuePairs::new(Lexer::new(src.bytes(), BufferType::Span),
TokenType::Null).count(), fcount);
}
}

0 comments on commit 431f051

Please sign in to comment.