Skip to content
This repository has been archived by the owner on Dec 15, 2018. It is now read-only.

Commit

Permalink
initial test passing
Browse files Browse the repository at this point in the history
init cli
lazy init data
intersection iter
move scaffold code back into sbt
make Leaf generic
  • Loading branch information
luizirber committed Nov 28, 2018
1 parent f9314fd commit d2aa7a5
Show file tree
Hide file tree
Showing 8 changed files with 482 additions and 108 deletions.
10 changes: 10 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ bench = false
[profile.release]
lto=true

[[bin]]
bench = false
path = "src/main.rs"
name = "smrs"

[features]
from-finch = ["finch", "needletail"]

Expand All @@ -25,13 +30,18 @@ from-finch = ["finch", "needletail"]
[dependencies]
backtrace = "0.3.4"
byteorder = "^1.2"
clap = { version = "~2.32", features = ["yaml"] }
derive_builder = "^0.7"
env_logger = "0.6.0"
exitfailure = "0.5.1"
failure = "0.1.3"
failure_derive = "0.1.3"
finch = { version = "~0.1.6", optional = true }
fixedbitset = "^0.1.9"
human-panic = "1.0.1"
lazy_static = "1.0.0"
lazy-init = "0.3.0"
log = "0.4.0"
md5 = "0.6.0"
murmurhash3 = "~0.0.5"
needletail = { version = "~0.2.1", optional = true }
Expand Down
27 changes: 17 additions & 10 deletions benches/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,37 @@ use sourmash::index::linear::LinearIndexBuilder;
use sourmash::index::sbt::{Node, SBT};
use sourmash::index::search::search_minhashes;
use sourmash::index::{Index, Leaf};
use sourmash::Signature;

fn find_bench(c: &mut Criterion) {
let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
filename.push("tests/data/v5.sbt.json");

let sbt: SBT<Node, Leaf> = SBT::from_path(filename).expect("Loading error");
let sbt: SBT<Node, Leaf<Signature>> = SBT::from_path(filename).expect("Loading error");

let leaf: Leaf = (*sbt.leaves().first().unwrap()).clone();
let leaf: Leaf<Signature> = (*sbt.leaves().first().unwrap()).clone();

let mut linear = LinearIndexBuilder::default()
.storage(sbt.storage())
.build()
.unwrap();
for l in &sbt.leaves() {
linear.insert(*l);
linear.insert(l);
}

let sbt_find = Fun::new("sbt_find", move |b: &mut Bencher, leaf: &Leaf| {
b.iter(|| sbt.find(search_minhashes, leaf, 0.1))
});

let linear_find = Fun::new("linear_find", move |b: &mut Bencher, leaf: &Leaf| {
b.iter(|| linear.find(search_minhashes, leaf, 0.1))
});
let sbt_find = Fun::new(
"sbt_find",
move |b: &mut Bencher, leaf: &Leaf<Signature>| {
b.iter(|| sbt.find(search_minhashes, leaf, 0.1))
},
);

let linear_find = Fun::new(
"linear_find",
move |b: &mut Bencher, leaf: &Leaf<Signature>| {
b.iter(|| linear.find(search_minhashes, leaf, 0.1))
},
);

let functions = vec![sbt_find, linear_find];
c.bench_functions("find", functions, leaf);
Expand Down
68 changes: 52 additions & 16 deletions src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::path::Path;
use std::rc::Rc;

use failure::Error;
use lazy_init::Lazy;

use index::storage::{ReadData, Storage};
use Signature;
Expand Down Expand Up @@ -67,16 +68,25 @@ pub struct LeafInfo {
}

#[derive(Builder, Default, Clone)]
pub struct Leaf {
pub struct Leaf<T>
where
T: std::marker::Sync,
{
pub(crate) filename: String,
pub(crate) name: String,
pub(crate) metadata: String,

#[builder(setter(skip))]
pub(crate) storage: Option<Rc<Storage>>,

#[builder(setter(skip))]
pub(crate) data: Rc<Lazy<T>>,
}

impl std::fmt::Debug for Leaf {
impl<T> std::fmt::Debug for Leaf<T>
where
T: std::marker::Sync,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
Expand All @@ -86,21 +96,47 @@ impl std::fmt::Debug for Leaf {
}
}

impl<S: Storage + ?Sized> ReadData<Signature, S> for Leaf {
fn data(&self, storage: &S) -> Result<Signature, Error> {
// TODO: cache this call!
let raw = storage.load(&self.filename)?;
let sigs: Vec<Signature> = serde_json::from_reader(&mut &raw[..])?;
// TODO: select the right sig?
Ok(sigs[0].clone())
impl<S: Storage + ?Sized> ReadData<Signature, S> for Leaf<Signature> {
fn data(&self, storage: &S) -> Result<&Signature, Error> {
let sig = self.data.get_or_create(|| {
let raw = storage.load(&self.filename).unwrap();
let sigs: Vec<Signature> = serde_json::from_reader(&mut &raw[..]).unwrap();
// TODO: select the right sig?
sigs[0].to_owned()
});

Ok(sig)
}
}

impl Leaf<Signature> {
pub fn count_common(&self, other: &Leaf<Signature>) -> u64 {
if let Some(storage) = &self.storage {
let ng: &Signature = self.data(&**storage).unwrap();
let ong: &Signature = other.data(&**storage).unwrap();

// TODO: select the right signatures...
ng.signatures[0].count_common(&ong.signatures[0]).unwrap() as u64
} else {
0
}
}

pub fn mins(&self) -> Vec<u64> {
if let Some(storage) = &self.storage {
let ng: &Signature = self.data(&**storage).unwrap();
ng.signatures[0].mins.iter().cloned().collect()
} else {
Vec::new()
}
}
}

impl Comparable<Leaf> for Leaf {
fn similarity(&self, other: &Leaf) -> f64 {
impl Comparable<Leaf<Signature>> for Leaf<Signature> {
fn similarity(&self, other: &Leaf<Signature>) -> f64 {
if let Some(storage) = &self.storage {
let ng: Signature = self.data(&**storage).unwrap();
let ong: Signature = other.data(&**storage).unwrap();
let ng: &Signature = self.data(&**storage).unwrap();
let ong: &Signature = other.data(&**storage).unwrap();

// TODO: select the right signatures...
ng.signatures[0].compare(&ong.signatures[0]).unwrap()
Expand All @@ -111,10 +147,10 @@ impl Comparable<Leaf> for Leaf {
}
}

fn containment(&self, other: &Leaf) -> f64 {
fn containment(&self, other: &Leaf<Signature>) -> f64 {
if let Some(storage) = &self.storage {
let mut ng: Signature = self.data(&**storage).unwrap();
let ong: Signature = other.data(&**storage).unwrap();
let ng: &Signature = self.data(&**storage).unwrap();
let ong: &Signature = other.data(&**storage).unwrap();

// TODO: select the right signatures...
let common = ng.signatures[0].count_common(&ong.signatures[0]).unwrap();
Expand Down
Loading

0 comments on commit d2aa7a5

Please sign in to comment.