Skip to content

Commit

Permalink
[ENH] Metadata indices (#1724)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - New functionality
- Blockfile-based metadata indexing. Currently uses a HashmapBlockfile,
easy to change.

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `pytest` for python, `yarn test` for js

## Documentation Changes
*Are all docstrings for user-facing APIs updated if required? Do we need
to make documentation changes in the [docs
repository](https://github.com/chroma-core/docs)?*

---------

Co-authored-by: hammadb <[email protected]>
  • Loading branch information
beggers and HammadB committed Feb 22, 2024
1 parent 5909146 commit 2e93302
Show file tree
Hide file tree
Showing 5 changed files with 340 additions and 20 deletions.
2 changes: 2 additions & 0 deletions rust/worker/src/blockstore/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
mod positional_posting_list_value;
mod types;

pub use types::*;
73 changes: 53 additions & 20 deletions rust/worker/src/blockstore/types.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
use super::positional_posting_list_value::PositionalPostingList;
use crate::errors::ChromaError;
use crate::errors::{ChromaError, ErrorCodes};
use thiserror::Error;
use arrow::array::Int32Array;
use roaring::RoaringBitmap;
use std::fmt::Display;
use std::fmt::{Debug, Display};
use std::hash::{Hash, Hasher};

#[derive(Debug, Error)]
pub(crate) enum BlockfileError {
#[error("Key not found")]
NotFoundError,
}

impl ChromaError for BlockfileError {
fn code(&self) -> ErrorCodes {
match self {
BlockfileError::NotFoundError => ErrorCodes::InvalidArgument,
}
}
}

// ===== Key Types =====
#[derive(Clone)]
Expand All @@ -16,19 +31,22 @@ pub(crate) struct BlockfileKey {
pub(crate) enum Key {
String(String),
Float(f32),
Bool(bool),
}

#[derive(Debug, Clone)]
pub(crate) enum KeyType {
String,
Float,
Bool,
}

impl Display for Key {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Key::String(s) => write!(f, "{}", s),
Key::Float(fl) => write!(f, "{}", fl),
Key::Bool(b) => write!(f, "{}", b),
}
}
}
Expand All @@ -39,6 +57,16 @@ impl BlockfileKey {
}
}

impl Debug for BlockfileKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"BlockfileKey(prefix: {}, key: {})",
self.prefix, self.key
)
}
}

impl Hash for BlockfileKey {
// Hash is only used for the HashMap implementation, which is a test/reference implementation
// Therefore this hash implementation is not used in production and allowed to be
Expand Down Expand Up @@ -72,11 +100,15 @@ impl Ord for BlockfileKey {
match self.key {
Key::String(ref s1) => match &other.key {
Key::String(s2) => s1.cmp(s2),
_ => panic!("Cannot compare string to float"),
_ => panic!("Cannot compare string to float or bool"),
},
Key::Float(f1) => match &other.key {
Key::Float(f2) => f1.partial_cmp(f2).unwrap(),
_ => panic!("Cannot compare float to string"),
_ => panic!("Cannot compare float to string or bool"),
},
Key::Bool(b1) => match &other.key {
Key::Bool(b2) => b1.cmp(b2),
_ => panic!("Cannot compare bool to string or float"),
},
}
} else {
Expand Down Expand Up @@ -155,7 +187,7 @@ pub(crate) trait Blockfile {
) -> Result<Vec<(BlockfileKey, Value)>, Box<dyn ChromaError>>;
}

struct HashMapBlockfile {
pub(crate) struct HashMapBlockfile {
map: std::collections::HashMap<BlockfileKey, Value>,
}

Expand All @@ -181,10 +213,7 @@ impl Blockfile for HashMapBlockfile {
fn get(&self, key: BlockfileKey) -> Result<Value, Box<dyn ChromaError>> {
match self.map.get(&key) {
Some(value) => Ok(value.clone()),
None => {
// TOOD: make error
panic!("Key not found");
}
None => Err(Box::new(BlockfileError::NotFoundError)),
}
}

Expand Down Expand Up @@ -276,17 +305,6 @@ mod tests {
use super::*;
use crate::blockstore::positional_posting_list_value::PositionalPostingListBuilder;
use arrow::array::Array;
use std::fmt::Debug;

impl Debug for BlockfileKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"BlockfileKey(prefix: {}, key: {})",
self.prefix, self.key
)
}
}

#[test]
fn test_blockfile_set_get() {
Expand Down Expand Up @@ -350,6 +368,21 @@ mod tests {
}
}

#[test]
fn test_bool_key() {
let mut blockfile = HashMapBlockfile::open("test").unwrap();
let key = BlockfileKey {
prefix: "text_prefix".to_string(),
key: Key::Bool(true),
};
let _res = blockfile.set(key.clone(), Value::Int32ArrayValue(Int32Array::from(vec![1])));
let value = blockfile.get(key).unwrap();
match value {
Value::Int32ArrayValue(arr) => assert_eq!(arr, Int32Array::from(vec![1])),
_ => panic!("Value is not an arrow int32 array"),
}
}

#[test]
fn test_storing_arrow_in_blockfile() {
let mut blockfile = HashMapBlockfile::open("test").unwrap();
Expand Down
3 changes: 3 additions & 0 deletions rust/worker/src/index/metadata/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mod types;

// TODO reexport the types module
Loading

0 comments on commit 2e93302

Please sign in to comment.