imp(null-filter): make it general

* What's formerly known as NullFilter can now filter out all key-value pairs with a given key-value type. That way, null can be filtered, as well as numbers, for example. * Made certain dual-branch matches an if-clause, which moved everything furthe to the left again, while making the code easier to read.
Byron · May 7, 2015 · 431f051 · 431f051
1 parent 08ad49b
commit 431f051
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 58 deletions.
diff --git a/benches/usage.rs b/benches/usage.rs
@@ -3,7 +3,7 @@
 extern crate json_tools;
 extern crate test;
 
-use json_tools::{Lexer, FilterNull, BufferType};
+use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType};
 
 const NULL_RIDDEN: &'static str = r##"
 {
@@ -138,7 +138,8 @@ fn bytes_lexer_throughput_in_bytes(b: &mut test::Bencher) {
 #[bench]
 fn span_filter_null_throughput_in_bytes(b: &mut test::Bencher) {
     b.iter(|| {
-        let f = FilterNull::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span));
+        let f = FilterTypedKeyValuePairs::new(Lexer::new(NULL_RIDDEN.bytes(), BufferType::Span),
+                                              TokenType::Null);
         for t in f {
             test::black_box(t);
         }
@@ -173,7 +174,7 @@ fn span_lexer_throughput_in_bytes_with_cursor_and_tee(b: &mut test::Bencher) {
                                       .tee(&mut keeper)
                                       .bytes()
                                       .filter_map(|r|r.ok()),
-                              BufferType::Bytes(128));
+                              BufferType::Span);
           for t in it {
               test::black_box(t);
           }

diff --git a/src/filter_null.rs → src/key_value_filter.rs b/src/filter_null.rs → src/key_value_filter.rs
@@ -1,25 +1,28 @@
 use super::{Token, TokenType};
 use std::collections::VecDeque;
 
-/// Removes tokens matching `,? "key": null ,?`.
+/// Removes tokens matching `,? "key": <type> ,?`., where `<type>` is a given 
+/// token type. Useful for removing `null` values, or all numbers, for instance.
 /// Is made in a resilient fashion which doesn't require a sane input token stream.
-pub struct FilterNull<I: Iterator<Item=Token>> {
+pub struct FilterTypedKeyValuePairs<I: Iterator<Item=Token>> {
     src: I,
     // NOTE: We could remove the deck and keep the 3 slots we need as Option<Token>
     // 0: optional comma
     // 1: first string
     // 2: colon
     buf: VecDeque<Token>,
-    next_token: Option<Token>
+    next_token: Option<Token>,
+    value_type: TokenType,
 }
 
-impl<I: Iterator<Item=Token>> FilterNull<I> {
-    /// Returns a new `FilterNull` instance from a `Token` iterator
-    pub fn new(src: I) -> FilterNull<I> {
-        FilterNull {
+impl<I: Iterator<Item=Token>> FilterTypedKeyValuePairs<I> {
+    /// Returns a new `FilterTypedKeyValuePairs` instance from a `Token` iterator
+    pub fn new(src: I, value_type: TokenType) -> FilterTypedKeyValuePairs<I> {
+        FilterTypedKeyValuePairs {
             src: src,
             buf: VecDeque::with_capacity(3),
-            next_token: None
+            next_token: None,
+            value_type: value_type
         }
     }
 
@@ -36,7 +39,7 @@ impl<I: Iterator<Item=Token>> FilterNull<I> {
     }
 }
 
-impl<I> Iterator for FilterNull<I> where I: Iterator<Item=Token>{
+impl<I> Iterator for FilterTypedKeyValuePairs<I> where I: Iterator<Item=Token>{
     type Item = Token;
 
     fn next(&mut self) -> Option<Token> {
@@ -63,56 +66,50 @@ impl<I> Iterator for FilterNull<I> where I: Iterator<Item=Token>{
                             let first_str_token = first_str_candidate;
                             match self.next_token() {
                                 Some(colon_candidate) => {
-                                    match colon_candidate.kind {
-                                        TokenType::Colon => {
-                                            let colon = colon_candidate;
-                                            match self.next_token() {
-                                                Some(second_str_candidate) => {
-                                                    match second_str_candidate.kind {
-                                                        TokenType::Null => {
-                                                            // WE HAVE A STR : STR triplete, and we forget it
-                                                            // This works by just not putting it onto the ringbuffer
-                                                            // See if there is a (optional) comma
-                                                            // If self.buf has anything, it must be a comma !
-                                                            // It is only 0 or 1 !
-                                                            match self.next_token() {
-                                                                Some(comma_candidate) => {
-                                                                    first_str_candidate = 
-                                                                        match match comma_candidate.kind {
-                                                                            TokenType::Comma => self.next_token(),
-                                                                            _ => {
-                                                                                self.buf.pop_front();
-                                                                                Some(comma_candidate)
-                                                                            }
-                                                                        } {
-                                                                            Some(t) => t,
-                                                                            None => return None,
-                                                                        };
-                                                                    continue;
-                                                                },
-                                                                None => return None,
-                                                            }
+                                    if colon_candidate.kind == TokenType::Colon {
+                                        let colon = colon_candidate;
+                                        match self.next_token() {
+                                            Some(second_str_candidate) => {
+                                                if second_str_candidate.kind == self.value_type {
+                                                    // WE HAVE A STR : STR triplete, and we forget it
+                                                    // This works by just not putting it onto the ringbuffer
+                                                    // See if there is a (optional) comma
+                                                    // If self.buf has anything, it must be a comma !
+                                                    // It is only 0 or 1 !
+                                                    match self.next_token() {
+                                                        Some(comma_candidate) => {
+                                                            first_str_candidate = 
+                                                                match match comma_candidate.kind {
+                                                                    TokenType::Comma => self.next_token(),
+                                                                    _ => {
+                                                                        self.buf.pop_front();
+                                                                        Some(comma_candidate)
+                                                                    }
+                                                                } {
+                                                                    Some(t) => t,
+                                                                    None => return None,
+                                                                };
+                                                            continue;
                                                         },
-                                                        _ => {
-                                                            let res = first_token(&mut self.buf, first_str_token);
-                                                            self.buf.push_back(colon);
-                                                            self.buf.push_back(second_str_candidate);
-                                                            return res
-                                                        }
+                                                        None => return None,
                                                     }
-                                                },
-                                                None => {
+                                                } else {
                                                     let res = first_token(&mut self.buf, first_str_token);
                                                     self.buf.push_back(colon);
+                                                    self.buf.push_back(second_str_candidate);
                                                     return res
                                                 }
+                                            },
+                                            None => {
+                                                let res = first_token(&mut self.buf, first_str_token);
+                                                self.buf.push_back(colon);
+                                                return res
                                             }
-                                        },
-                                        _ => {
-                                            let res = first_token(&mut self.buf, first_str_token);
-                                            self.buf.push_back(colon_candidate);
-                                            return res
                                         }
+                                    } else {
+                                        let res = first_token(&mut self.buf, first_str_token);
+                                        self.buf.push_back(colon_candidate);
+                                        return res
                                     }// end is colon token
                                 },// end have token (colon?)
                                 None => return first_token(&mut self.buf, first_str_token),

diff --git a/src/lib.rs b/src/lib.rs
@@ -1,5 +1,5 @@
 mod lexer;
-mod filter_null;
+mod key_value_filter;
 
 pub use lexer::{Lexer, Token, TokenType, Span, BufferType, Buffer};
-pub use filter_null::{FilterNull};
+pub use key_value_filter::{FilterTypedKeyValuePairs};
diff --git a/tests/filters.rs b/tests/filters.rs
@@ -1,6 +1,6 @@
 extern crate json_tools;
 
-use json_tools::{Lexer, FilterNull, BufferType};
+use json_tools::{Lexer, FilterTypedKeyValuePairs, BufferType, TokenType};
 
 #[test]
 fn filter_null_values() {
@@ -13,6 +13,7 @@ fn filter_null_values() {
                                    (r#"{"s":true, "s":null, "s":true }"#, 13, 9),
                                    (r#"{"s":true, "s":null "s":true }"#, 12, 8),] {
         assert_eq!(Lexer::new(src.bytes(), BufferType::Span).count(), count);
-        assert_eq!(FilterNull::new(Lexer::new(src.bytes(), BufferType::Span)).count(), fcount);    
+        assert_eq!(FilterTypedKeyValuePairs::new(Lexer::new(src.bytes(), BufferType::Span),
+                                                 TokenType::Null).count(), fcount);    
     }
 }