Skip to content
This repository has been archived by the owner on Dec 15, 2018. It is now read-only.

SBT scaffold #21

Merged
merged 2 commits into from
Nov 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ bench = false
[profile.release]
lto=true

[[bin]]
bench = false
path = "src/main.rs"
name = "smrs"

[features]
from-finch = ["finch", "needletail"]

Expand All @@ -25,13 +30,18 @@ from-finch = ["finch", "needletail"]
[dependencies]
backtrace = "0.3.4"
byteorder = "^1.2"
clap = { version = "~2.32", features = ["yaml"] }
derive_builder = "^0.7"
env_logger = "0.6.0"
exitfailure = "0.5.1"
failure = "0.1.3"
failure_derive = "0.1.3"
finch = { version = "~0.1.6", optional = true }
fixedbitset = "^0.1.9"
human-panic = "1.0.1"
lazy_static = "1.0.0"
lazy-init = "0.3.0"
log = "0.4.0"
md5 = "0.6.0"
murmurhash3 = "~0.0.5"
needletail = { version = "~0.2.1", optional = true }
Expand Down
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
all: test

check: build test bench

build:
cargo build

bench:
cargo bench

test:
cargo test

target/sourmash.h: src/lib.rs src/ffi.rs src/errors.rs
include/sourmash.h: src/lib.rs src/ffi.rs src/errors.rs
RUST_BACKTRACE=1 cbindgen --clean -c cbindgen.toml -o $@

.phony: test
68 changes: 56 additions & 12 deletions benches/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,81 @@ use std::path::PathBuf;

use criterion::{Bencher, Criterion, Fun};
use sourmash::index::linear::LinearIndexBuilder;
use sourmash::index::nodegraph::Nodegraph;
use sourmash::index::sbt::{Node, SBT};
use sourmash::index::search::search_minhashes;
use sourmash::index::{Index, Leaf};
use sourmash::Signature;

fn find_bench(c: &mut Criterion) {
fn find_small_bench(c: &mut Criterion) {
let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
filename.push("tests/data/v5.sbt.json");

let sbt: SBT<Node, Leaf> = SBT::from_path(filename).expect("Loading error");
let sbt: SBT<Node<Nodegraph>, Leaf<Signature>> =
SBT::from_path(filename).expect("Loading error");

let leaf: Leaf = (*sbt.leaves().first().unwrap()).clone();
let leaf: Leaf<Signature> = (*sbt.leaves().first().unwrap()).clone();

let mut linear = LinearIndexBuilder::default()
.storage(sbt.storage())
.build()
.unwrap();
for l in &sbt.leaves() {
linear.insert(*l);
linear.insert(l);
}

let sbt_find = Fun::new("sbt_find", move |b: &mut Bencher, leaf: &Leaf| {
b.iter(|| sbt.find(search_minhashes, leaf, 0.1))
});
let sbt_find = Fun::new(
"sbt_find",
move |b: &mut Bencher, leaf: &Leaf<Signature>| {
b.iter(|| sbt.find(search_minhashes, leaf, 0.1))
},
);

let linear_find = Fun::new("linear_find", move |b: &mut Bencher, leaf: &Leaf| {
b.iter(|| linear.find(search_minhashes, leaf, 0.1))
});
let linear_find = Fun::new(
"linear_find",
move |b: &mut Bencher, leaf: &Leaf<Signature>| {
b.iter(|| linear.find(search_minhashes, leaf, 0.1))
},
);

let functions = vec![sbt_find, linear_find];
c.bench_functions("find", functions, leaf);
c.bench_functions("find_small", functions, leaf);
}

criterion_group!(benches, find_bench);
fn find_subset_bench(c: &mut Criterion) {
let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
filename.push("tests/data/subset.sbt.json");

let sbt: SBT<Node<Nodegraph>, Leaf<Signature>> =
SBT::from_path(filename).expect("Loading error");

let leaf: Leaf<Signature> = (*sbt.leaves().first().unwrap()).clone();

let mut linear = LinearIndexBuilder::default()
.storage(sbt.storage())
.build()
.unwrap();
for l in &sbt.leaves() {
linear.insert(l);
}

let sbt_find = Fun::new(
"sbt_find",
move |b: &mut Bencher, leaf: &Leaf<Signature>| {
b.iter(|| sbt.find(search_minhashes, leaf, 0.1))
},
);

let linear_find = Fun::new(
"linear_find",
move |b: &mut Bencher, leaf: &Leaf<Signature>| {
b.iter(|| linear.find(search_minhashes, leaf, 0.1))
},
);

let functions = vec![sbt_find, linear_find];
c.bench_functions("find_subset", functions, leaf);
}

criterion_group!(benches, find_small_bench, find_subset_bench);
criterion_main!(benches);
File renamed without changes.
68 changes: 52 additions & 16 deletions src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::path::Path;
use std::rc::Rc;

use failure::Error;
use lazy_init::Lazy;

use index::storage::{ReadData, Storage};
use Signature;
Expand Down Expand Up @@ -67,16 +68,25 @@ pub struct LeafInfo {
}

#[derive(Builder, Default, Clone)]
pub struct Leaf {
pub struct Leaf<T>
where
T: std::marker::Sync,
{
pub(crate) filename: String,
pub(crate) name: String,
pub(crate) metadata: String,

#[builder(setter(skip))]
pub(crate) storage: Option<Rc<Storage>>,

#[builder(setter(skip))]
pub(crate) data: Rc<Lazy<T>>,
}

impl std::fmt::Debug for Leaf {
impl<T> std::fmt::Debug for Leaf<T>
where
T: std::marker::Sync,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
Expand All @@ -86,21 +96,47 @@ impl std::fmt::Debug for Leaf {
}
}

impl<S: Storage + ?Sized> ReadData<Signature, S> for Leaf {
fn data(&self, storage: &S) -> Result<Signature, Error> {
// TODO: cache this call!
let raw = storage.load(&self.filename)?;
let sigs: Vec<Signature> = serde_json::from_reader(&mut &raw[..])?;
// TODO: select the right sig?
Ok(sigs[0].clone())
impl<S: Storage + ?Sized> ReadData<Signature, S> for Leaf<Signature> {
fn data(&self, storage: &S) -> Result<&Signature, Error> {
let sig = self.data.get_or_create(|| {
let raw = storage.load(&self.filename).unwrap();
let sigs: Vec<Signature> = serde_json::from_reader(&mut &raw[..]).unwrap();
// TODO: select the right sig?
sigs[0].to_owned()
});

Ok(sig)
}
}

impl Leaf<Signature> {
pub fn count_common(&self, other: &Leaf<Signature>) -> u64 {
if let Some(storage) = &self.storage {
let ng: &Signature = self.data(&**storage).unwrap();
let ong: &Signature = other.data(&**storage).unwrap();

// TODO: select the right signatures...
ng.signatures[0].count_common(&ong.signatures[0]).unwrap() as u64
} else {
0
}
}

pub fn mins(&self) -> Vec<u64> {
if let Some(storage) = &self.storage {
let ng: &Signature = self.data(&**storage).unwrap();
ng.signatures[0].mins.iter().cloned().collect()
} else {
Vec::new()
}
}
}

impl Comparable<Leaf> for Leaf {
fn similarity(&self, other: &Leaf) -> f64 {
impl Comparable<Leaf<Signature>> for Leaf<Signature> {
fn similarity(&self, other: &Leaf<Signature>) -> f64 {
if let Some(storage) = &self.storage {
let ng: Signature = self.data(&**storage).unwrap();
let ong: Signature = other.data(&**storage).unwrap();
let ng: &Signature = self.data(&**storage).unwrap();
let ong: &Signature = other.data(&**storage).unwrap();

// TODO: select the right signatures...
ng.signatures[0].compare(&ong.signatures[0]).unwrap()
Expand All @@ -111,10 +147,10 @@ impl Comparable<Leaf> for Leaf {
}
}

fn containment(&self, other: &Leaf) -> f64 {
fn containment(&self, other: &Leaf<Signature>) -> f64 {
if let Some(storage) = &self.storage {
let mut ng: Signature = self.data(&**storage).unwrap();
let ong: Signature = other.data(&**storage).unwrap();
let ng: &Signature = self.data(&**storage).unwrap();
let ong: &Signature = other.data(&**storage).unwrap();

// TODO: select the right signatures...
let common = ng.signatures[0].count_common(&ong.signatures[0]).unwrap();
Expand Down
4 changes: 2 additions & 2 deletions src/index/nodegraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use fixedbitset::FixedBitSet;

type HashIntoType = u64;

#[derive(Debug)]
pub(crate) struct Nodegraph {
#[derive(Debug, Default, Clone)]
pub struct Nodegraph {
bs: Vec<FixedBitSet>,
ksize: usize,
occupied_bins: usize,
Expand Down
Loading