diff --git a/Cargo.toml b/Cargo.toml index e342cfc..2514161 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["kermit", "kermit-algos", "kermit-ds", "kermit-iters"] +members = ["kermit", "kermit-algos", "kermit-ds", "kermit-kvs", "kermit-iters"] [workspace.package] authors = ["Aidan Bailey"] @@ -12,6 +12,7 @@ repository = "https://github.com/aidan-bailey/kermit" [patch.crates-io] kermit = { path = "kermit" } +kermit-kvs = { path = "kermit-kvs" } kermit-ds = { path = "kermit-ds" } kermit-iters = { path = "kermit-iters" } kermit-algos = { path = "kermit-algos" } diff --git a/kermit-kvs/Cargo.toml b/kermit-kvs/Cargo.toml new file mode 100644 index 0000000..c238b06 --- /dev/null +++ b/kermit-kvs/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "kermit-kvs" +description = "Key-value stores used in Kermit" +version = "0.0.2-dev" +authors.workspace = true +edition.workspace = true +homepage.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true + +[dependencies] +csv = "1.1" +nohash-hasher = "0.2.0" diff --git a/kermit-kvs/src/anyvaltype.rs b/kermit-kvs/src/anyvaltype.rs new file mode 100644 index 0000000..1e528f6 --- /dev/null +++ b/kermit-kvs/src/anyvaltype.rs @@ -0,0 +1,125 @@ +use std::{any::TypeId, hash::Hash}; + +#[derive(Debug, Clone, PartialEq)] +pub enum AnyValType { + Str(String), + I32(i32), + I64(i64), + F32(f32), + F64(f64), +} + +// Defaults + +impl AnyValType { + pub fn default_str() -> Self { AnyValType::Str(String::new()) } + + pub fn default_i32() -> Self { AnyValType::I32(0) } + + pub fn default_i64() -> Self { AnyValType::I64(0) } + + pub fn default_f32() -> Self { AnyValType::F32(0.0) } + + pub fn default_f64() -> Self { AnyValType::F64(0.0) } +} + +// Parsing + +impl AnyValType { + pub fn parse_str(v: &str) -> Self { AnyValType::Str(v.to_string()) } + + pub fn parse_i32(v: &str) -> Self { AnyValType::I32(v.parse().unwrap()) } + + pub fn parse_i64(v: &str) -> Self { AnyValType::I64(v.parse().unwrap()) } + + pub fn parse_f32(v: &str) -> Self { AnyValType::F32(v.parse().unwrap()) } + + pub fn parse_f64(v: &str) -> Self { AnyValType::F64(v.parse().unwrap()) } + + pub fn parse_into_self(&self, v: &str) -> Self { + match self { + | AnyValType::Str(_) => AnyValType::parse_str(v), + | AnyValType::I32(_) => AnyValType::parse_i32(v), + | AnyValType::I64(_) => AnyValType::parse_i64(v), + | AnyValType::F32(_) => AnyValType::parse_f32(v), + | AnyValType::F64(_) => AnyValType::parse_f64(v), + } + } +} + +// To Methods + +impl AnyValType { + pub fn to_string(&self) -> Option<&String> { + match self { + | AnyValType::Str(v) => Some(v), + | _ => None, + } + } + + pub fn to_i32(&self) -> Option { + match self { + | AnyValType::I32(v) => Some(*v), + | _ => None, + } + } + + pub fn to_i64(&self) -> Option { + match self { + | AnyValType::I64(v) => Some(*v), + | _ => None, + } + } + + pub fn to_f32(&self) -> Option { + match self { + | AnyValType::F32(v) => Some(*v), + | _ => None, + } + } + + pub fn to_f64(&self) -> Option { + match self { + | AnyValType::F64(v) => Some(*v), + | _ => None, + } + } +} + +// From Methods + +impl From<&str> for AnyValType { + fn from(v: &str) -> Self { AnyValType::Str(v.to_string()) } +} + +impl From for AnyValType { + fn from(v: String) -> Self { AnyValType::Str(v) } +} + +impl From for AnyValType { + fn from(v: i32) -> Self { AnyValType::I32(v) } +} + +impl From for AnyValType { + fn from(v: i64) -> Self { AnyValType::I64(v) } +} + +impl From for AnyValType { + fn from(v: f32) -> Self { AnyValType::F32(v) } +} + +impl From for AnyValType { + fn from(v: f64) -> Self { AnyValType::F64(v) } +} + +impl Hash for AnyValType { + fn hash(&self, state: &mut H) { + match self { + | AnyValType::Str(v) => (TypeId::of::(), v).hash(state), + | AnyValType::I32(v) => (TypeId::of::(), v).hash(state), + | AnyValType::I64(v) => (TypeId::of::(), v).hash(state), + | AnyValType::F32(v) => (TypeId::of::(), v.to_bits()).hash(state), + | AnyValType::F64(v) => (TypeId::of::(), v.to_bits()).hash(state), + } + } +} diff --git a/kermit-kvs/src/keyvalstore.rs b/kermit-kvs/src/keyvalstore.rs new file mode 100644 index 0000000..ca1eafc --- /dev/null +++ b/kermit-kvs/src/keyvalstore.rs @@ -0,0 +1,16 @@ +use std::hash::Hash; + +pub trait KeyValStore +where + KT: Eq + Hash + Clone + PartialOrd, + VT: Hash, +{ + fn add(&mut self, val: VT) -> KT; + fn add_all(&mut self, val: Vec) -> Vec; + fn get(&self, key: &KT) -> Option<&VT>; + fn get_all(&self, key: Vec<&KT>) -> Vec>; + fn keys(&self) -> Vec; + fn size(&self) -> usize; + fn contains_key(&self, key: &KT) -> bool; + fn contains_val(&self, val: &VT) -> bool; +} diff --git a/kermit-kvs/src/lib.rs b/kermit-kvs/src/lib.rs new file mode 100644 index 0000000..37751d0 --- /dev/null +++ b/kermit-kvs/src/lib.rs @@ -0,0 +1,3 @@ +pub mod anyvaltype; +pub mod keyvalstore; +pub mod naivestore; diff --git a/kermit-kvs/src/naivestore.rs b/kermit-kvs/src/naivestore.rs new file mode 100644 index 0000000..e9ef53c --- /dev/null +++ b/kermit-kvs/src/naivestore.rs @@ -0,0 +1,103 @@ +use { + crate::{anyvaltype::AnyValType, keyvalstore::KeyValStore}, + csv::Error, + nohash_hasher::BuildNoHashHasher, + std::{ + collections::HashMap, + fs::File, + hash::{BuildHasher, BuildHasherDefault, DefaultHasher, Hash}, + path::Path, + }, +}; + +pub struct NaiveStore +where + VT: Hash + Clone, + HB: BuildHasher, +{ + map: HashMap>, + hash_builder: HB, +} + +impl KeyValStore for NaiveStore +where + VT: Hash + Clone, + HB: BuildHasher, +{ + fn add(&mut self, val: VT) -> u64 { + let hash = self.hash_builder.hash_one(&val); + self.map.insert(hash, val); + hash + } + + fn add_all(&mut self, val: Vec) -> Vec { + val.into_iter().map(|v| self.add(v)).collect() + } + + fn get(&self, key: &u64) -> Option<&VT> { self.map.get(key) } + + fn get_all(&self, key: Vec<&u64>) -> Vec> { + key.into_iter().map(|k| self.get(k)).collect() + } + + fn keys(&self) -> Vec { self.map.keys().cloned().collect() } + + fn size(&self) -> usize { self.map.len() } + + fn contains_key(&self, key: &u64) -> bool { self.map.contains_key(key) } + + fn contains_val(&self, val: &VT) -> bool { self.contains_key(&self.hash_builder.hash_one(val)) } +} + +impl NaiveStore +where + VT: Hash + Clone, + HB: BuildHasher, +{ + pub fn with_hasher(hasher_builder: HB) -> Self { + Self { + map: HashMap::with_hasher(BuildNoHashHasher::::default()), + hash_builder: hasher_builder, + } + } +} + +impl NaiveStore +where + HB: BuildHasher, +{ + pub fn add_file>( + &mut self, types: Vec, filepath: P, + ) -> Result<(), Error> { + let file = File::open(filepath)?; + let mut rdr = csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b',') + .double_quote(false) + .escape(Some(b'\\')) + .flexible(false) + .comment(Some(b'#')) + .from_reader(file); + for result in rdr.records() { + let record = result?; + for (i, x) in record.iter().enumerate() { + let t = &types[i]; + let val = t.parse_into_self(x); + self.add(val); + } + } + Ok(()) + } +} + +impl Default for NaiveStore> +where + VT: Hash + Clone, +{ + fn default() -> Self { + Self { + map: HashMap::with_hasher(BuildNoHashHasher::::default()), + hash_builder: BuildHasherDefault::::default(), + } + } +} diff --git a/kermit-kvs/tests/naivestore.rs b/kermit-kvs/tests/naivestore.rs new file mode 100644 index 0000000..f97fdb4 --- /dev/null +++ b/kermit-kvs/tests/naivestore.rs @@ -0,0 +1,69 @@ +#[cfg(test)] +mod tests { + use kermit_kvs::{anyvaltype::*, keyvalstore::*, naivestore::*}; + + #[test] + fn test_default() { + let mut store = NaiveStore::::default(); + let key1 = store.add("hello".to_string()); + let key2 = store.add("world".to_string()); + assert_eq!(store.get(&key1), Some(&"hello".to_string())); + assert_eq!(store.get(&key2), Some(&"world".to_string())); + assert_eq!(store.get(&0), None); + assert_eq!(store.get_all(vec![&key1, &key2, &0]), vec![ + Some(&"hello".to_string()), + Some(&"world".to_string()), + None + ]); + } + + #[test] + fn test_anyvaltype() { + let mut store = NaiveStore::::default(); + let str_key1 = store.add(AnyValType::from("hello")); + let str_key2 = store.add(AnyValType::from("world")); + assert_eq!(store.get(&str_key1), Some(&AnyValType::from("hello"))); + assert_eq!(store.get(&str_key2), Some(&AnyValType::from("world"))); + let float_key1 = store.add(AnyValType::F64(0.5)); + assert_eq!(store.get(&float_key1), Some(&AnyValType::F64(0.5))); + } + + #[test] + fn read_file() { + let mut store = NaiveStore::::default(); + store + .add_file( + vec![ + AnyValType::default_str(), + AnyValType::default_str(), + AnyValType::default_str(), + ], + "tests/test1.csv.test", + ) + .unwrap(); + assert_eq!(5, store.size()); + assert!(store.contains_val(&"Apple".into())); + assert!(store.contains_val(&"Is".into())); + assert!(store.contains_val(&"Delicious".into())); + assert!(store.contains_val(&"Banana".into())); + assert!(store.contains_val(&"Yellow".into())); + store + .add_file( + vec![ + AnyValType::default_str(), + AnyValType::default_str(), + AnyValType::default_i32(), + AnyValType::default_i32(), + ], + "tests/test2.csv.test", + ) + .unwrap(); + assert_eq!(11, store.size()); + assert!(store.contains_val(&"house".into())); + assert!(store.contains_val(&"locatedat".into())); + assert!(store.contains_val(&0_i32.into())); + assert!(store.contains_val(&5_i32.into())); + assert!(store.contains_val(&2_i32.into())); + assert!(store.contains_val(&"chair".into())); + } +} diff --git a/kermit-kvs/tests/test1.csv.test b/kermit-kvs/tests/test1.csv.test new file mode 100644 index 0000000..364deca --- /dev/null +++ b/kermit-kvs/tests/test1.csv.test @@ -0,0 +1,2 @@ +Apple,Is,Delicious +Banana,Is,Yellow diff --git a/kermit-kvs/tests/test2.csv.test b/kermit-kvs/tests/test2.csv.test new file mode 100644 index 0000000..d5def87 --- /dev/null +++ b/kermit-kvs/tests/test2.csv.test @@ -0,0 +1,2 @@ +house,locatedat,0,5 +chair,locatedat,2,5 diff --git a/kermit/Cargo.toml b/kermit/Cargo.toml index ded2b92..06eebb3 100644 --- a/kermit/Cargo.toml +++ b/kermit/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "kermit" description = "Relational data structures, iterators and algorithms" -version = "0.0.4-dev" +version = "0.0.5-dev" authors.workspace = true edition.workspace = true homepage.workspace = true @@ -10,6 +10,7 @@ readme.workspace = true repository.workspace = true [dependencies] +kermit-kvs = { version = "0.0.2-dev", path = "../kermit-kvs" } kermit-ds = { version = "0.0.4-dev", path = "../kermit-ds" } kermit-iters = { version = "0.0.4-dev", path = "../kermit-iters" } kermit-algos = { version = "0.0.4-dev", path = "../kermit-algos" } diff --git a/kermit/src/lib.rs b/kermit/src/lib.rs index e2a29ad..695f2cd 100644 --- a/kermit/src/lib.rs +++ b/kermit/src/lib.rs @@ -9,3 +9,7 @@ pub mod ds { pub mod iters { pub use kermit_iters::*; } + +pub mod kvs { + pub use kermit_kvs::*; +}