Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issues/16 add datavalue dictionary #17

Merged
merged 28 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3f60a11
Create kermit-dv project
aidan-bailey Aug 15, 2024
a3119f4
Rename kermit-dv to kermit-kvs
aidan-bailey Aug 15, 2024
7cedd63
Change kermit-kvs to be a lib
aidan-bailey Aug 15, 2024
09d595f
Add kermit-kvs patch
aidan-bailey Aug 15, 2024
f49610e
Add kermit-kvs dep
aidan-bailey Aug 15, 2024
5f29ac4
Add kermit_kvs export
aidan-bailey Aug 15, 2024
a381eca
Create keyvalstore.rs
aidan-bailey Aug 15, 2024
89a4be8
Refactor
aidan-bailey Aug 16, 2024
aef02b7
Create naivestore.rs
aidan-bailey Aug 16, 2024
fed9e9c
Fix missing dependency
aidan-bailey Aug 16, 2024
4374cfb
Refactor
aidan-bailey Aug 16, 2024
6e6fb3f
Add test
aidan-bailey Aug 16, 2024
aabfe7a
Add AnyValType
aidan-bailey Aug 16, 2024
8a1f261
Move anyvaltype into own crate
aidan-bailey Aug 16, 2024
d68ef3d
Refactor
aidan-bailey Aug 16, 2024
41e8f49
Refactor
aidan-bailey Aug 16, 2024
50bbf2d
Add type id to hash
aidan-bailey Aug 16, 2024
8630448
Add defaults and parsing functions to AnyValType
aidan-bailey Aug 17, 2024
35ab0ad
Add csv dep
aidan-bailey Aug 17, 2024
672fc43
Add add_file function
aidan-bailey Aug 17, 2024
87da81b
Move tests
aidan-bailey Aug 17, 2024
5297092
Add size()
aidan-bailey Aug 17, 2024
3e7fcde
Add test files
aidan-bailey Aug 17, 2024
04498e4
Add contains_key() and contains_val()
aidan-bailey Aug 17, 2024
f250732
Add more asserts to read_file test
aidan-bailey Aug 17, 2024
103034a
Format
aidan-bailey Aug 17, 2024
5bbfc4c
Fix clippy warning
aidan-bailey Aug 17, 2024
60b68c8
Fix clippy warning
aidan-bailey Aug 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["kermit", "kermit-algos", "kermit-ds", "kermit-iters"]
members = ["kermit", "kermit-algos", "kermit-ds", "kermit-kvs", "kermit-iters"]

[workspace.package]
authors = ["Aidan Bailey"]
Expand All @@ -12,6 +12,7 @@ repository = "https://github.com/aidan-bailey/kermit"

[patch.crates-io]
kermit = { path = "kermit" }
kermit-kvs = { path = "kermit-kvs" }
kermit-ds = { path = "kermit-ds" }
kermit-iters = { path = "kermit-iters" }
kermit-algos = { path = "kermit-algos" }
14 changes: 14 additions & 0 deletions kermit-kvs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "kermit-kvs"
description = "Key-value stores used in Kermit"
version = "0.0.2-dev"
authors.workspace = true
edition.workspace = true
homepage.workspace = true
license.workspace = true
readme.workspace = true
repository.workspace = true

[dependencies]
csv = "1.1"
nohash-hasher = "0.2.0"
125 changes: 125 additions & 0 deletions kermit-kvs/src/anyvaltype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
use std::{any::TypeId, hash::Hash};

#[derive(Debug, Clone, PartialEq)]
pub enum AnyValType {
Str(String),
I32(i32),
I64(i64),
F32(f32),
F64(f64),
}

// Defaults

impl AnyValType {
pub fn default_str() -> Self { AnyValType::Str(String::new()) }

pub fn default_i32() -> Self { AnyValType::I32(0) }

pub fn default_i64() -> Self { AnyValType::I64(0) }

pub fn default_f32() -> Self { AnyValType::F32(0.0) }

pub fn default_f64() -> Self { AnyValType::F64(0.0) }
}

// Parsing

impl AnyValType {
pub fn parse_str(v: &str) -> Self { AnyValType::Str(v.to_string()) }

pub fn parse_i32(v: &str) -> Self { AnyValType::I32(v.parse().unwrap()) }

pub fn parse_i64(v: &str) -> Self { AnyValType::I64(v.parse().unwrap()) }

pub fn parse_f32(v: &str) -> Self { AnyValType::F32(v.parse().unwrap()) }

pub fn parse_f64(v: &str) -> Self { AnyValType::F64(v.parse().unwrap()) }

pub fn parse_into_self(&self, v: &str) -> Self {
match self {
| AnyValType::Str(_) => AnyValType::parse_str(v),
| AnyValType::I32(_) => AnyValType::parse_i32(v),
| AnyValType::I64(_) => AnyValType::parse_i64(v),
| AnyValType::F32(_) => AnyValType::parse_f32(v),
| AnyValType::F64(_) => AnyValType::parse_f64(v),
}
}
}

// To Methods

impl AnyValType {
pub fn to_string(&self) -> Option<&String> {
match self {
| AnyValType::Str(v) => Some(v),
| _ => None,
}
}

pub fn to_i32(&self) -> Option<i32> {
match self {
| AnyValType::I32(v) => Some(*v),
| _ => None,
}
}

pub fn to_i64(&self) -> Option<i64> {
match self {
| AnyValType::I64(v) => Some(*v),
| _ => None,
}
}

pub fn to_f32(&self) -> Option<f32> {
match self {
| AnyValType::F32(v) => Some(*v),
| _ => None,
}
}

pub fn to_f64(&self) -> Option<f64> {
match self {
| AnyValType::F64(v) => Some(*v),
| _ => None,
}
}
}

// From Methods

impl From<&str> for AnyValType {
fn from(v: &str) -> Self { AnyValType::Str(v.to_string()) }
}

impl From<String> for AnyValType {
fn from(v: String) -> Self { AnyValType::Str(v) }
}

impl From<i32> for AnyValType {
fn from(v: i32) -> Self { AnyValType::I32(v) }
}

impl From<i64> for AnyValType {
fn from(v: i64) -> Self { AnyValType::I64(v) }
}

impl From<f32> for AnyValType {
fn from(v: f32) -> Self { AnyValType::F32(v) }
}

impl From<f64> for AnyValType {
fn from(v: f64) -> Self { AnyValType::F64(v) }
}

impl Hash for AnyValType {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self {
| AnyValType::Str(v) => (TypeId::of::<String>(), v).hash(state),
| AnyValType::I32(v) => (TypeId::of::<i32>(), v).hash(state),
| AnyValType::I64(v) => (TypeId::of::<i64>(), v).hash(state),
| AnyValType::F32(v) => (TypeId::of::<f32>(), v.to_bits()).hash(state),
| AnyValType::F64(v) => (TypeId::of::<f64>(), v.to_bits()).hash(state),
}
}
}
16 changes: 16 additions & 0 deletions kermit-kvs/src/keyvalstore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use std::hash::Hash;

pub trait KeyValStore<KT, VT>
where
KT: Eq + Hash + Clone + PartialOrd,
VT: Hash,
{
fn add(&mut self, val: VT) -> KT;
fn add_all(&mut self, val: Vec<VT>) -> Vec<KT>;
fn get(&self, key: &KT) -> Option<&VT>;
fn get_all(&self, key: Vec<&KT>) -> Vec<Option<&VT>>;
fn keys(&self) -> Vec<KT>;
fn size(&self) -> usize;
fn contains_key(&self, key: &KT) -> bool;
fn contains_val(&self, val: &VT) -> bool;
}
3 changes: 3 additions & 0 deletions kermit-kvs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod anyvaltype;
pub mod keyvalstore;
pub mod naivestore;
103 changes: 103 additions & 0 deletions kermit-kvs/src/naivestore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
use {
crate::{anyvaltype::AnyValType, keyvalstore::KeyValStore},
csv::Error,
nohash_hasher::BuildNoHashHasher,
std::{
collections::HashMap,
fs::File,
hash::{BuildHasher, BuildHasherDefault, DefaultHasher, Hash},
path::Path,
},
};

pub struct NaiveStore<VT, HB>
where
VT: Hash + Clone,
HB: BuildHasher,
{
map: HashMap<u64, VT, BuildNoHashHasher<u64>>,
hash_builder: HB,
}

impl<VT, HB> KeyValStore<u64, VT> for NaiveStore<VT, HB>
where
VT: Hash + Clone,
HB: BuildHasher,
{
fn add(&mut self, val: VT) -> u64 {
let hash = self.hash_builder.hash_one(&val);
self.map.insert(hash, val);
hash
}

fn add_all(&mut self, val: Vec<VT>) -> Vec<u64> {
val.into_iter().map(|v| self.add(v)).collect()
}

fn get(&self, key: &u64) -> Option<&VT> { self.map.get(key) }

fn get_all(&self, key: Vec<&u64>) -> Vec<Option<&VT>> {
key.into_iter().map(|k| self.get(k)).collect()
}

fn keys(&self) -> Vec<u64> { self.map.keys().cloned().collect() }

fn size(&self) -> usize { self.map.len() }

fn contains_key(&self, key: &u64) -> bool { self.map.contains_key(key) }

fn contains_val(&self, val: &VT) -> bool { self.contains_key(&self.hash_builder.hash_one(val)) }
}

impl<VT, HB> NaiveStore<VT, HB>
where
VT: Hash + Clone,
HB: BuildHasher,
{
pub fn with_hasher(hasher_builder: HB) -> Self {
Self {
map: HashMap::with_hasher(BuildNoHashHasher::<u64>::default()),
hash_builder: hasher_builder,
}
}
}

impl<HB> NaiveStore<AnyValType, HB>
where
HB: BuildHasher,
{
pub fn add_file<P: AsRef<Path>>(
&mut self, types: Vec<AnyValType>, filepath: P,
) -> Result<(), Error> {
let file = File::open(filepath)?;
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.delimiter(b',')
.double_quote(false)
.escape(Some(b'\\'))
.flexible(false)
.comment(Some(b'#'))
.from_reader(file);
for result in rdr.records() {
let record = result?;
for (i, x) in record.iter().enumerate() {
let t = &types[i];
let val = t.parse_into_self(x);
self.add(val);
}
}
Ok(())
}
}

impl<VT> Default for NaiveStore<VT, BuildHasherDefault<DefaultHasher>>
where
VT: Hash + Clone,
{
fn default() -> Self {
Self {
map: HashMap::with_hasher(BuildNoHashHasher::<u64>::default()),
hash_builder: BuildHasherDefault::<DefaultHasher>::default(),
}
}
}
69 changes: 69 additions & 0 deletions kermit-kvs/tests/naivestore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#[cfg(test)]
mod tests {
use kermit_kvs::{anyvaltype::*, keyvalstore::*, naivestore::*};

#[test]
fn test_default() {
let mut store = NaiveStore::<String, _>::default();
let key1 = store.add("hello".to_string());
let key2 = store.add("world".to_string());
assert_eq!(store.get(&key1), Some(&"hello".to_string()));
assert_eq!(store.get(&key2), Some(&"world".to_string()));
assert_eq!(store.get(&0), None);
assert_eq!(store.get_all(vec![&key1, &key2, &0]), vec![
Some(&"hello".to_string()),
Some(&"world".to_string()),
None
]);
}

#[test]
fn test_anyvaltype() {
let mut store = NaiveStore::<AnyValType, _>::default();
let str_key1 = store.add(AnyValType::from("hello"));
let str_key2 = store.add(AnyValType::from("world"));
assert_eq!(store.get(&str_key1), Some(&AnyValType::from("hello")));
assert_eq!(store.get(&str_key2), Some(&AnyValType::from("world")));
let float_key1 = store.add(AnyValType::F64(0.5));
assert_eq!(store.get(&float_key1), Some(&AnyValType::F64(0.5)));
}

#[test]
fn read_file() {
let mut store = NaiveStore::<AnyValType, _>::default();
store
.add_file(
vec![
AnyValType::default_str(),
AnyValType::default_str(),
AnyValType::default_str(),
],
"tests/test1.csv.test",
)
.unwrap();
assert_eq!(5, store.size());
assert!(store.contains_val(&"Apple".into()));
assert!(store.contains_val(&"Is".into()));
assert!(store.contains_val(&"Delicious".into()));
assert!(store.contains_val(&"Banana".into()));
assert!(store.contains_val(&"Yellow".into()));
store
.add_file(
vec![
AnyValType::default_str(),
AnyValType::default_str(),
AnyValType::default_i32(),
AnyValType::default_i32(),
],
"tests/test2.csv.test",
)
.unwrap();
assert_eq!(11, store.size());
assert!(store.contains_val(&"house".into()));
assert!(store.contains_val(&"locatedat".into()));
assert!(store.contains_val(&0_i32.into()));
assert!(store.contains_val(&5_i32.into()));
assert!(store.contains_val(&2_i32.into()));
assert!(store.contains_val(&"chair".into()));
}
}
2 changes: 2 additions & 0 deletions kermit-kvs/tests/test1.csv.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Apple,Is,Delicious
Banana,Is,Yellow
2 changes: 2 additions & 0 deletions kermit-kvs/tests/test2.csv.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
house,locatedat,0,5
chair,locatedat,2,5
3 changes: 2 additions & 1 deletion kermit/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "kermit"
description = "Relational data structures, iterators and algorithms"
version = "0.0.4-dev"
version = "0.0.5-dev"
authors.workspace = true
edition.workspace = true
homepage.workspace = true
Expand All @@ -10,6 +10,7 @@ readme.workspace = true
repository.workspace = true

[dependencies]
kermit-kvs = { version = "0.0.2-dev", path = "../kermit-kvs" }
kermit-ds = { version = "0.0.4-dev", path = "../kermit-ds" }
kermit-iters = { version = "0.0.4-dev", path = "../kermit-iters" }
kermit-algos = { version = "0.0.4-dev", path = "../kermit-algos" }
4 changes: 4 additions & 0 deletions kermit/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ pub mod ds {
pub mod iters {
pub use kermit_iters::*;
}

pub mod kvs {
pub use kermit_kvs::*;
}