Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major refactoring and performance improvements. #91

Merged
merged 1 commit into from
Jun 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ sudo: false
script:
- cargo build --verbose
- cargo test --verbose
- ./run-shootout-test
- |
[ $TRAVIS_RUST_VERSION != nightly ] || (
cargo test --verbose --features pattern &&
Expand Down
12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,15 @@ An implementation of regular expressions for Rust.

[[test]]
path = "regex_macros/tests/test_dynamic.rs"
name = "all"
name = "dynamic"

[[test]]
path = "regex_macros/tests/test_dynamic_nfa.rs"
name = "dynamic_nfa"

[[test]]
path = "regex_macros/tests/test_dynamic_backtrack.rs"
name = "dynamic_backtrack"

[[bench]]
name = "all"
Expand All @@ -22,6 +30,8 @@ test = false
bench = true

[dependencies]
aho-corasick = "0.1"
memchr = "0.1"
regex-syntax = { path = "regex-syntax", version = "0.1" }

[dev-dependencies]
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ A Rust library for parsing, compiling, and executing regular expressions.

[Documentation](http://doc.rust-lang.org/regex)


## Usage

Add this to your `Cargo.toml`:
Expand All @@ -23,6 +24,7 @@ and this to your crate root:
extern crate regex;
```


# License

`regex` is primarily distributed under the terms of both the MIT license and
Expand Down
File renamed without changes.
13 changes: 13 additions & 0 deletions examples/regexdna-output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
agggtaaa|tttaccct 0
[cgt]gggtaaa|tttaccc[acg] 3
a[act]ggtaaa|tttacc[agt]t 9
ag[act]gtaaa|tttac[agt]ct 8
agg[act]taaa|ttta[agt]cct 10
aggg[acg]aaa|ttt[cgt]ccct 3
agggt[cgt]aa|tt[acg]accct 4
agggta[cgt]a|t[acg]taccct 3
agggtaa[cgt]|[acg]ttaccct 5

101745
100000
133640
67 changes: 67 additions & 0 deletions examples/shootout-regex-dna.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by BurntSushi

extern crate regex;

use std::io::{self, Read};
use std::sync::Arc;
use std::thread;

macro_rules! regex { ($re:expr) => { ::regex::Regex::new($re).unwrap() } }

fn main() {
let mut seq = String::with_capacity(10 * (1 << 20));
io::stdin().read_to_string(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
let clen = seq.len();
let seq_arc = Arc::new(seq.clone());

let variants = vec![
regex!("agggtaaa|tttaccct"),
regex!("[cgt]gggtaaa|tttaccc[acg]"),
regex!("a[act]ggtaaa|tttacc[agt]t"),
regex!("ag[act]gtaaa|tttac[agt]ct"),
regex!("agg[act]taaa|ttta[agt]cct"),
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
regex!("agggt[cgt]aa|tt[acg]accct"),
regex!("agggta[cgt]a|t[acg]taccct"),
regex!("agggtaa[cgt]|[acg]ttaccct"),
];
let mut counts = vec![];
for variant in variants {
let seq = seq_arc.clone();
let restr = variant.to_string();
let future = thread::spawn(move || variant.find_iter(&seq).count());
counts.push((restr, future));
}

let substs = vec![
(regex!("B"), "(c|g|t)"),
(regex!("D"), "(a|g|t)"),
(regex!("H"), "(a|c|t)"),
(regex!("K"), "(g|t)"),
(regex!("M"), "(a|c)"),
(regex!("N"), "(a|c|g|t)"),
(regex!("R"), "(a|g)"),
(regex!("S"), "(c|g)"),
(regex!("V"), "(a|c|g)"),
(regex!("W"), "(a|t)"),
(regex!("Y"), "(c|t)"),
];
let mut seq = seq;
for (re, replacement) in substs.into_iter() {
seq = re.replace_all(&seq, replacement);
}
let rlen = seq.len();

for (variant, count) in counts {
println!("{} {}", variant, count.join().unwrap());
}
println!("\n{}\n{}\n{}", ilen, clen, rlen);
}
4 changes: 0 additions & 4 deletions regex_macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ plugin = true
path = "tests/test_native.rs"
name = "all"

[[test]]
path = "benches/shootout-regex-dna.rs"
name = "shootout_regex_dna"

[[bench]]
name = "all"
path = "benches/bench_native.rs"
Expand Down
11 changes: 11 additions & 0 deletions regex_macros/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ fn match_class_in_range(b: &mut Bencher) {
bench_assert_match(b, re, &text);
}

#[bench]
fn match_class_unicode(b: &mut Bencher) {
let re = regex!(r"\pL");
let text = format!("{}a", repeat("☃5☃5").take(20).collect::<String>());
bench_assert_match(b, re, &text);
}

#[bench]
fn replace_all(b: &mut Bencher) {
let re = regex!("[cjrw]");
Expand Down Expand Up @@ -171,15 +178,19 @@ fn gen_text(n: usize) -> String {
throughput!(easy0_32, easy0(), 32);
throughput!(easy0_1K, easy0(), 1<<10);
throughput!(easy0_32K, easy0(), 32<<10);
throughput!(easy0_1MB, easy0(), 1<<20);

throughput!(easy1_32, easy1(), 32);
throughput!(easy1_1K, easy1(), 1<<10);
throughput!(easy1_32K, easy1(), 32<<10);
throughput!(easy1_1MB, easy1(), 1<<20);

throughput!(medium_32, medium(), 32);
throughput!(medium_1K, medium(), 1<<10);
throughput!(medium_32K,medium(), 32<<10);
throughput!(medium_1MB, medium(), 1<<20);

throughput!(hard_32, hard(), 32);
throughput!(hard_1K, hard(), 1<<10);
throughput!(hard_32K,hard(), 32<<10);
throughput!(hard_1MB, hard(), 1<<20);
Loading