Skip to content

Commit

Permalink
feat(cognitarium): rework triples primary key storage
Browse files Browse the repository at this point in the history
Use (object, predicate, subject) tuple as primary key using a blake3
hash for the object that could enventually be large. This allows to only
keep one additional index (subject, predicate) to efficiently query triples.
  • Loading branch information
amimart committed Jun 1, 2023
1 parent 2011789 commit 5409300
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 145 deletions.
39 changes: 39 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions contracts/okp4-cognitarium/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ rio_api = "0.8.3"
rio_xml = "0.8.3"
rio_turtle = "0.8.3"
oxiri = "0.2.2"
blake3 = "1.3.3"

[dev-dependencies]
base64 = "0.21.2"
Expand Down
27 changes: 12 additions & 15 deletions contracts/okp4-cognitarium/src/contract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use cw2::set_contract_version;

use crate::error::ContractError;
use crate::msg::{ExecuteMsg, InstantiateMsg, QueryMsg};
use crate::state::{Store, STORE, TRIPLE_KEY_INCREMENT};
use crate::state::{Store, STORE};

// version info for migration info
const CONTRACT_NAME: &str = concat!("crates.io:", env!("CARGO_PKG_NAME"));
Expand All @@ -24,7 +24,6 @@ pub fn instantiate(
set_contract_version(deps.storage, CONTRACT_NAME, CONTRACT_VERSION)?;

STORE.save(deps.storage, &Store::new(info.sender, msg.limits.into()))?;
TRIPLE_KEY_INCREMENT.save(deps.storage, &Uint128::zero())?;

Ok(Response::default())
}
Expand All @@ -47,18 +46,17 @@ pub mod execute {
use crate::msg::DataInput;
use crate::rdf;
use crate::state::{triples, Triple};
use blake3::Hash;

pub fn insert(deps: DepsMut, graph: DataInput) -> Result<Response, ContractError> {
let mut store = STORE.load(deps.storage)?;

let mut pk = TRIPLE_KEY_INCREMENT.load(deps.storage)?;
let old_count = store.stat.triples_count;
rdf::parse_triples(
graph,
|triple| -> Result<Triple, ContractError> { Ok(triple.try_into()?) },
|res| -> Result<(), ContractError> {
res.and_then(|triple| {
pk += Uint128::one();
store.stat.triples_count += Uint128::one();

store
Expand All @@ -72,14 +70,22 @@ pub mod execute {
})
.unwrap_or(Ok(()))?;

let object_hash: Hash = triple.object.as_hash();
triples()
.save(deps.storage, pk.u128(), &triple)
.save(
deps.storage,
(
object_hash.as_bytes(),
triple.predicate.clone(),
triple.subject.clone(),
),
&triple,
)
.map_err(ContractError::Std)
})
},
)?;

TRIPLE_KEY_INCREMENT.save(deps.storage, &pk)?;
STORE.save(deps.storage, &store)?;

Ok(Response::new()
Expand Down Expand Up @@ -147,11 +153,6 @@ mod tests {
triples_count: Uint128::zero(),
}
);

assert_eq!(
TRIPLE_KEY_INCREMENT.load(&deps.storage),
Ok(Uint128::zero())
);
}

#[test]
Expand Down Expand Up @@ -201,10 +202,6 @@ mod tests {
STORE.load(&deps.storage).unwrap().stat.triples_count,
Uint128::from(40u128),
);
assert_eq!(
TRIPLE_KEY_INCREMENT.load(&deps.storage).unwrap(),
Uint128::from(40u128),
);
}
}

Expand Down
105 changes: 1 addition & 104 deletions contracts/okp4-cognitarium/src/state/de.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::state::triples::{Literal, Node, Object, Subject};
use crate::state::triples::{Node, Subject};
use cosmwasm_std::{StdError, StdResult};
use cw_storage_plus::{Key, KeyDeserialize, Prefixer, PrimaryKey};

Expand Down Expand Up @@ -43,55 +43,6 @@ impl KeyDeserialize for Subject {
}
}

impl<'a> PrimaryKey<'a> for Object {
type Prefix = ();
type SubPrefix = ();
type Suffix = Self;
type SuperSuffix = Self;

fn key(&self) -> Vec<Key> {
match self {
Object::Named(node) => {
let mut key: Vec<Key> = Vec::with_capacity(3);
key.push(Key::Val8([b'n']));
for k in node.key() {
key.push(k);
}
key
}
Object::Blank(node) => {
let mut key: Vec<Key> = Vec::with_capacity(2);
key.push(Key::Val8([b'b']));
key.push(Key::Ref(node.as_bytes()));
key
}
Object::Literal(literal) => {
let encoded = literal.key();
let mut key: Vec<Key> = Vec::with_capacity(encoded.len() + 1);
key.push(Key::Val8([b'l']));
for k in encoded {
key.push(k);
}
key
}
}
}
}

impl KeyDeserialize for Object {
type Output = Object;

fn from_vec(mut value: Vec<u8>) -> StdResult<Self::Output> {
let bytes = value.split_off(3);
match bytes[2] {
b'n' => Node::from_vec(value).map(|n| Object::Named(n)),
b'b' => Ok(Object::Blank(String::from_vec(value)?)),
b'l' => Literal::from_vec(value).map(|l| Object::Literal(l)),
_ => Err(StdError::generic_err("Could not deserialize Object")),
}
}
}

impl<'a> PrimaryKey<'a> for Node {
type Prefix = ();
type SubPrefix = ();
Expand Down Expand Up @@ -126,57 +77,3 @@ impl KeyDeserialize for Node {
})
}
}

impl Literal {
fn key(&self) -> Vec<Key> {
match self {
Literal::Simple { value } => {
vec![Key::Ref(value.as_bytes()), Key::Ref(&[]), Key::Ref(&[])]
}
Literal::I18NString { value, language } => {
vec![
Key::Ref(value.as_bytes()),
Key::Ref(language.as_bytes()),
Key::Ref(&[]),
]
}
Literal::Typed { value, datatype } => {
let mut key: Vec<Key> = Vec::with_capacity(3);
key.push(Key::Ref(value.as_bytes()));
for k in datatype.key() {
key.push(k);
}
key
}
}
}

fn from_vec(mut value: Vec<u8>) -> StdResult<Self> {
let mut part1 = value.split_off(2);
let p1_len = parse_length(&value)?;
let mut part2_len = part1.split_off(p1_len);

let mut part2 = part2_len.split_off(2);
let p2_len = parse_length(&part2_len)?;
let part3 = part2.split_off(p2_len);

if part3.is_empty() {
if part2.is_empty() {
return Ok(Literal::Simple {
value: String::from_vec(part1)?,
});
}
return Ok(Literal::I18NString {
value: String::from_vec(part1)?,
language: String::from_vec(part2)?,
});
}
Ok(Literal::Typed {
value: String::from_vec(part1)?,
datatype: Node {
value: String::from_vec(part2)?,
namespace: String::from_vec(part3)?,
},
})
}
}
70 changes: 44 additions & 26 deletions contracts/okp4-cognitarium/src/state/triples.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,22 @@
use crate::rdf::explode_iri;
use cosmwasm_std::{StdError, Uint128};
use cw_storage_plus::{Index, IndexList, IndexedMap, Item, MultiIndex};
use blake3::Hash;
use cosmwasm_std::StdError;
use cw_storage_plus::{Index, IndexList, IndexedMap, MultiIndex};
use rio_api::model::NamedNode;
use serde::{Deserialize, Serialize};

/// Triple primary key as [Uint128] auto-increment.
///
/// Note: Considering the maximum value of [Uint128] there is no need to manage any re-usability of
/// keys in case of triple removal.
pub const TRIPLE_KEY_INCREMENT: Item<Uint128> = Item::new("triple-key");

pub struct TripleIndexes<'a> {
subject_and_predicate: MultiIndex<'a, (Subject, Predicate), Triple, Uint128>,
predicate_and_object: MultiIndex<'a, (Predicate, Object), Triple, Uint128>,
subject_and_predicate:
MultiIndex<'a, (Subject, Predicate), Triple, (&'a [u8], Predicate, Subject)>,
}

impl IndexList<Triple> for TripleIndexes<'_> {
fn get_indexes(&self) -> Box<dyn Iterator<Item = &'_ dyn Index<Triple>> + '_> {
Box::new(
vec![
&self.subject_and_predicate as &dyn Index<Triple>,
&self.predicate_and_object,
]
.into_iter(),
)
Box::new(vec![&self.subject_and_predicate as &dyn Index<Triple>].into_iter())
}
}

pub fn triples<'a>() -> IndexedMap<'a, u128, Triple, TripleIndexes<'a>> {
pub fn triples<'a>() -> IndexedMap<'a, (&'a [u8], Predicate, Subject), Triple, TripleIndexes<'a>> {
IndexedMap::new(
"TRIPLE",
TripleIndexes {
Expand All @@ -36,20 +25,15 @@ pub fn triples<'a>() -> IndexedMap<'a, u128, Triple, TripleIndexes<'a>> {
"TRIPLE",
"TRIPLE__SUBJECT_PREDICATE",
),
predicate_and_object: MultiIndex::new(
|_pk, triple| (triple.predicate.clone(), triple.object.clone()),
"TRIPLE",
"TRIPLE__PREDICATE_OBJECT",
),
},
)
}

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
pub struct Triple {
subject: Subject,
predicate: Predicate,
object: Object,
pub subject: Subject,
pub predicate: Predicate,
pub object: Object,
}

impl<'a> TryFrom<rio_api::model::Triple<'a>> for Triple {
Expand Down Expand Up @@ -108,6 +92,40 @@ impl<'a> TryFrom<rio_api::model::Term<'a>> for Object {
}
}

impl Object {
pub fn as_hash(&self) -> Hash {
let mut hasher = blake3::Hasher::new();
match self {
Object::Named(n) => {
hasher
.update(&[b'n'])
.update(n.namespace.as_bytes())
.update(n.namespace.as_bytes());
}
Object::Blank(n) => {
hasher.update(&[b'b']).update(n.as_bytes());
}
Object::Literal(l) => {
hasher.update(&[b'l']);
match l {
Literal::Simple { value } => hasher.update(&[b's']).update(value.as_bytes()),
Literal::I18NString { value, language } => hasher
.update(&[b'i'])
.update(value.as_bytes())
.update(language.as_bytes()),
Literal::Typed { value, datatype } => hasher
.update(&[b't'])
.update(value.as_bytes())
.update(datatype.namespace.as_bytes())
.update(datatype.value.as_bytes()),
};
}
}

hasher.finalize()
}
}

pub type BlankNode = String;

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
Expand Down

0 comments on commit 5409300

Please sign in to comment.