diff --git a/.travis.yml b/.travis.yml
index f62594ea27..0e28eb4339 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,7 @@ sudo: false
 script:
   - cargo build --verbose
   - cargo test --verbose
+  - ./run-shootout-test
   - |
     [ $TRAVIS_RUST_VERSION != nightly ] || (
       cargo test --verbose --features pattern &&
diff --git a/Cargo.toml b/Cargo.toml
index aaaeb98567..24dbf30803 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,15 @@ An implementation of regular expressions for Rust.
 
 [[test]]
 path = "regex_macros/tests/test_dynamic.rs"
-name = "all"
+name = "dynamic"
+
+[[test]]
+path = "regex_macros/tests/test_dynamic_nfa.rs"
+name = "dynamic_nfa"
+
+[[test]]
+path = "regex_macros/tests/test_dynamic_backtrack.rs"
+name = "dynamic_backtrack"
 
 [[bench]]
 name = "all"
@@ -22,6 +30,8 @@ test = false
 bench = true
 
 [dependencies]
+aho-corasick = "0.1"
+memchr = "0.1"
 regex-syntax = { path = "regex-syntax", version = "0.1" }
 
 [dev-dependencies]
diff --git a/README.md b/README.md
index 98673988a2..1065b383f8 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@ A Rust library for parsing, compiling, and executing regular expressions.
 
 [Documentation](http://doc.rust-lang.org/regex)
 
+
 ## Usage
 
 Add this to your `Cargo.toml`:
@@ -23,6 +24,7 @@ and this to your crate root:
 extern crate regex;
 ```
 
+
 # License
 
 `regex` is primarily distributed under the terms of both the MIT license and
diff --git a/regex_macros/benches/regexdna-input.txt b/examples/regexdna-input.txt
similarity index 100%
rename from regex_macros/benches/regexdna-input.txt
rename to examples/regexdna-input.txt
diff --git a/examples/regexdna-output.txt b/examples/regexdna-output.txt
new file mode 100644
index 0000000000..d36baa5be8
--- /dev/null
+++ b/examples/regexdna-output.txt
@@ -0,0 +1,13 @@
+agggtaaa|tttaccct 0
+[cgt]gggtaaa|tttaccc[acg] 3
+a[act]ggtaaa|tttacc[agt]t 9
+ag[act]gtaaa|tttac[agt]ct 8
+agg[act]taaa|ttta[agt]cct 10
+aggg[acg]aaa|ttt[cgt]ccct 3
+agggt[cgt]aa|tt[acg]accct 4
+agggta[cgt]a|t[acg]taccct 3
+agggtaa[cgt]|[acg]ttaccct 5
+
+101745
+100000
+133640
diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs
new file mode 100644
index 0000000000..304e27a54f
--- /dev/null
+++ b/examples/shootout-regex-dna.rs
@@ -0,0 +1,67 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+extern crate regex;
+
+use std::io::{self, Read};
+use std::sync::Arc;
+use std::thread;
+
+macro_rules! regex { ($re:expr) => { ::regex::Regex::new($re).unwrap() } }
+
+fn main() {
+    let mut seq = String::with_capacity(10 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
+    let clen = seq.len();
+    let seq_arc = Arc::new(seq.clone());
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let mut counts = vec![];
+    for variant in variants {
+        let seq = seq_arc.clone();
+        let restr = variant.to_string();
+        let future = thread::spawn(move || variant.find_iter(&seq).count());
+        counts.push((restr, future));
+    }
+
+    let substs = vec![
+        (regex!("B"), "(c|g|t)"),
+        (regex!("D"), "(a|g|t)"),
+        (regex!("H"), "(a|c|t)"),
+        (regex!("K"), "(g|t)"),
+        (regex!("M"), "(a|c)"),
+        (regex!("N"), "(a|c|g|t)"),
+        (regex!("R"), "(a|g)"),
+        (regex!("S"), "(c|g)"),
+        (regex!("V"), "(a|c|g)"),
+        (regex!("W"), "(a|t)"),
+        (regex!("Y"), "(c|t)"),
+    ];
+    let mut seq = seq;
+    for (re, replacement) in substs.into_iter() {
+        seq = re.replace_all(&seq, replacement);
+    }
+    let rlen = seq.len();
+
+    for (variant, count) in counts {
+        println!("{} {}", variant, count.join().unwrap());
+    }
+    println!("\n{}\n{}\n{}", ilen, clen, rlen);
+}
diff --git a/regex_macros/Cargo.toml b/regex_macros/Cargo.toml
index 49eb9c7bf3..9ad09cc7b3 100644
--- a/regex_macros/Cargo.toml
+++ b/regex_macros/Cargo.toml
@@ -17,10 +17,6 @@ plugin = true
 path = "tests/test_native.rs"
 name = "all"
 
-[[test]]
-path = "benches/shootout-regex-dna.rs"
-name = "shootout_regex_dna"
-
 [[bench]]
 name = "all"
 path = "benches/bench_native.rs"
diff --git a/regex_macros/benches/bench.rs b/regex_macros/benches/bench.rs
index 1a1e2379b1..9cb0c809a1 100644
--- a/regex_macros/benches/bench.rs
+++ b/regex_macros/benches/bench.rs
@@ -59,6 +59,13 @@ fn match_class_in_range(b: &mut Bencher) {
     bench_assert_match(b, re, &text);
 }
 
+#[bench]
+fn match_class_unicode(b: &mut Bencher) {
+    let re = regex!(r"\pL");
+    let text = format!("{}a", repeat("☃5☃5").take(20).collect::<String>());
+    bench_assert_match(b, re, &text);
+}
+
 #[bench]
 fn replace_all(b: &mut Bencher) {
     let re = regex!("[cjrw]");
@@ -171,15 +178,19 @@ fn gen_text(n: usize) -> String {
 throughput!(easy0_32, easy0(), 32);
 throughput!(easy0_1K, easy0(), 1<<10);
 throughput!(easy0_32K, easy0(), 32<<10);
+throughput!(easy0_1MB, easy0(), 1<<20);
 
 throughput!(easy1_32, easy1(), 32);
 throughput!(easy1_1K, easy1(), 1<<10);
 throughput!(easy1_32K, easy1(), 32<<10);
+throughput!(easy1_1MB, easy1(), 1<<20);
 
 throughput!(medium_32, medium(), 32);
 throughput!(medium_1K, medium(), 1<<10);
 throughput!(medium_32K,medium(), 32<<10);
+throughput!(medium_1MB, medium(), 1<<20);
 
 throughput!(hard_32, hard(), 32);
 throughput!(hard_1K, hard(), 1<<10);
 throughput!(hard_32K,hard(), 32<<10);
+throughput!(hard_1MB, hard(), 1<<20);
diff --git a/regex_macros/benches/random.txt b/regex_macros/benches/random.txt
new file mode 100644
index 0000000000..dfae5cd142
--- /dev/null
+++ b/regex_macros/benches/random.txt
@@ -0,0 +1,513 @@
+
+mnxnsynfvuugtbxsxbfxwreuspglnplefzwsp
+tacfqcwnmodnmgnyiuvqoco
+z
+
+qjuozfkexn
+zoaxzncje
+sldhqtmgxzyurfyzwazmmu
+bbeuv
+mzsrihycwcb
+xzfqozfmlnpmrzpxxxytqs
+xrg
+mcplby
+nmslhfgjowhzfxsvyddydnsyehdskbydbjksqtpet
+indvfw
+bvjvvw
+
+pddufodyqtyixbndtumndyz
+xjjhtuvmsxhuwqulqtjhqrdqrmtbcphvyuqllocrnkpfv
+zemshhz
+wss
+xewlrxfmgxnwgphcgefa
+mbgsgbzrtthxweimcqzcaaheurdmd
+osqefupespvh
+z
+tvvlakwzwjbrgjzfgubsmmonav
+pjdskxcfgapsm
+zqktqgkrcdrlskx
+zwwfebhguskho
+zlvvw
+czwm
+gojnpmboehlsazbexjjnuscqftrfufngygjdxcydib
+d
+afigycivicnknfxl
+ljuwuopctiftfwctxecwipjnljyef
+jonwbkodomzhqvlf
+jdkizhognqsdogunwedjsmsdzho
+zxvni
+oynfjf
+muvokjuqz
+azuwrwtuxzfopwrcex
+ixrjinlvxjmn
+blaegnmbhsgsbmebwazaeguugtkowexgnqtbfkldadddv
+tzabyoftyov
+ctbtqbzscxzviuvcigwuwusrdro
+ljynr
+gnnnyyxslrhsbj
+hhzlw
+hijalf
+rxlfqk
+mhaofforwznvmcgplinludpgkucpa
+gvvxsqqfmu
+xxqhoyosixjfhjuxpv
+faadjpvamjekreepizurntvwdynozfawsfawyms
+
+lcbutr
+aqyxvpozkjrecrkl
+lfmochahrr
+ptqyomjlwo
+vcmslulznx
+lmlsskcihrmxauztuarydlp
+beiqsrfnmvmlmybmwpektjbikvpggthpabqsgmjhnthvysuhwbigillugjsp
+dfsuegseffwcsnvsrqedytblbpzbfeyfsq
+kypvqctrkuds
+ylqeduokzgdqaxelhftxnxbidu
+bprzyayfopxdsmfhhfqowa
+ymiutdtlfaaxpbtaeslv
+ggago
+
+owpbicekdeykzfgcbgzobdvvrtetvcv
+xsrlgingstiez
+gyncqvq
+xasohmeiwyscpehctmzmsnjklg
+xsudghakxlw
+dzqlfptjogzpkvwuticcyugnyopypuqqc
+wlxshxbhdvuherumoppcc
+
+znyaptivzncvkpeyeipynqefjxjjcsgfqbnezeebtowdrbjaqjlbxwvyikrmxjwoxngqgvfpbniftnmszuxg
+umwpwwyvufy
+pallkjtnrmtauqxauewgygwkjjwebbkabhtxticxmxfujpxlrpzlrozfslkzfdsswlmmsbdgjwmjnummk
+dhsxylejzityahtqqzmohrpzjprrsraztpnuagtyzfjdekthvdogfidksrdppr
+ybc
+fyukknoqfnkllkwflwempjijxgo
+dltvlau
+rhvrvlwsribfctuzodfqkdczfzxnetqqzflnhiyl
+goxmcasmq
+wljbhwkpahdotqhhrbhqzijv
+lszewkgdmkezvgmbmllhpksdkoiwgkvqjmurshrptlctqsosuurndcuzjfwherotv
+dudxxihygxblhgchbgzyzffb
+eht
+fvwxvqoltdcsd
+rkuig
+e
+axhsacsmnicugul
+rubtdlhjqndxdzzwfnkuzy
+swxteuyxxsktkjgv
+hzwwodlqaq
+vxgecev
+qnwla
+vdxjuzpyoqhpmuunyffptopmeauhycs
+dkzo
+awrfzatzohslgvqlaezepmli
+qgxatixvpkkhvkumbwmwcagtgyfljdok
+amdnzstpvcqj
+xsrvwvhjirzfgkessve
+qezwbfltfbikbmoasvoflozsjhrljnszqiciuqmflrlqowwkoevuumh
+babskcvavmtvsxqsewirucwzajjcfcqwsydydqo
+ywfurpsl
+edacsjjkjjewkxfoh
+dcgkfpcjezurnuhiatrczcp
+xsatnimwbcciu
+grzmbrsvvcyigcbmcqfwiiknrohveubhyijxeyzfm
+kqyewccgcqrrrznwxmoztlyseagbpyho
+najju
+nis
+awgzdvfjkzlrsjcqfeacx
+oisuflfigrjaex
+desbdulyuwqxuxianyypybxwlql
+ekmqgspvqpftpwswayh
+egbyj
+fznzprhvnnwcxgcc
+wfdsueieosmugirxbymbpmfrspvrktjzguxm
+qkjrufshwnfwwpbhukdjlaqvljlgubmqmhnha
+hwqpudgnblhlxppbrmbznotteivuzguuwlhtkytky
+w
+yofkyzbpg
+cenolnfnllkvhikrpttcxgqxmufvorekjruyjxmr
+
+hyexmpjijgzumawp
+cdbevdilgopbzlo
+fivelagckslkugdxprjxkylizewcptwxfhomzuituujixchadmnjoktnqa
+csojvlinzmmkkfzqueamnuwkanzdzsavgohposbuoamoevehqrmcxdsuyelvvctoejzoertqormhaaxwofvjzekwt
+sbkghhnhutrvwtyjaxndzyjamrhx
+jjyqy
+majwbnrhveuhrsbbbjrwpwuplifeseylqh
+wyvutpxnkrnkuxxetjkkifpqb
+dyzucmbcvgnjeecm
+hz
+uhnuipthxrzkqluosvk
+lwqqzsdwiwvwaqfwlvubadlyizlo
+jbd
+oyzjeu
+kydjkbsqxnbfiuesc
+smeubjqrcxdvhsabzceyglqjzbfmoacmwvwjbhhxbr
+uabipgecujfdfxpmdzrscdyvefizabgspqjrrkmgjt
+xgvdgzryz
+lw
+uimob
+ifhn
+bqph
+ole
+g
+wt
+k
+yslzrkwkundxfdibwqvucemepqxlmlpyngabbeciuzhptpjdetyngrtxrdtzmvq
+ccwapidp
+
+bwvrgvmtshevrophy
+ni
+fdkplu
+mdykey
+i
+rhsrenoetdggpjb
+djmkplpeabsholx
+judxtub
+fooakqwvocvpcrvxqhvtmpvhkrecy
+uuxscjillynilbkrgt
+evtinrmilniguarqritpeipwochmdw
+sxaqzjybydyvnmmjtdcgkjnqfcklbfpkdfyewgcukqoiegyfp
+kg
+ovrwieqhy
+jcxqtkerzjwhs
+xeonglszbgypafhmqcaseimzjgebkvigbqwsayrnrprtuvhsxyitfqygohgorcdnufbcyvevvgzmjrgjqqquwkszplogx
+zdketqqv
+yebckucwayckeezfvtnavglpjh
+zorkfrwk
+pad
+xqaquxudybwtgixbfktinctfirjfdayh
+rieknj
+ebk
+qzbcfywfdmhsdruhopovemafijbscagllkmhmof
+
+asbsnbddlobwoqatfhkbhhsymzqxjuixwreheugvngmgcuqpkjhhfwpbarqaxrwgwnjbanljlds
+etevdvlc
+lqyjrnmenhn
+k
+tsf
+zczgeavcexh
+jlpuxywtsrvnvluruqhecjca
+ir
+rikrgkmhwaosodkxgcnrexfmdrszhnmutpvwztg
+bffjqovvkemctnsgeh
+weysbhzixiipfithjfsk
+usyzvaiyuhmksfluoirfbnsu
+o
+cgawpdakaszeafdtbdkqtlzkrpnoqomqvuaqcfmzgvfegovtfaonelpv
+izmrcjlk
+xmzemniyrzy
+knqexaafsdlimdamcrprlshq
+qkmqw
+dntgjwsibclvposdwjuklvtejjjdjibgpyynqpgprvvaetshhmvfkcpb
+otvazkrkklrxfotpopyjte
+fghkcnpi
+rulyaihsowvcgbzeiblhuhhfbmncqsuuqcxvseorn
+exirzfmojnxcoqom
+zsgpgtokun
+zvamxfocorganbtlafifwdqmqtsnktbwwtewborq
+
+cxlnaspjqvsitjyzyriqsuorjsrvzqenisprttudxntsbqrpjtdkxnwcwgjyxmgtqljcrmrbrmyvosojzlumcmjcgfjsdehec
+mvx
+mt
+mckr
+teulvroifk
+laaicc
+koufy
+bexmwsvyarnznebdfy
+ripvviosbqijsxnjilwddaqaqemzsdarnxmfooxghoypizwtbueo
+ljycycuqwfnzbambibqdixmkkvwtubepla
+cis
+kcg
+vmbbiuuoamenzepuagpfujevfstqtndjxjchdvycfrrrowochtjdmkklgnhf
+pmorrwguxkvdxpluatagaziin
+
+uwvzbmkmykjkmknzppklx
+pnzxuvsrjunqxercsnvayhykcazdeclomdsasgkpqpiufyfqsxhj
+yceizkddwojgweegcllaagpvrpo
+ek
+kuxxgbezqyxvfaxdwnqdgqsmneijunxzlwxkrs
+ldldbrxmvtjlqxifngmactzqcygkvuteffcmvphevilabgukatqakamjlridznodcvblvlogulmcixxfimh
+iuzjootuywjqklolzzhpeaynydjwtufjavbozxnzckuzdodkvkjfmhinelv
+swlfkcufscfcovmghqwcrtxjukwafoeogrkgubbqgwzm
+gjcylkwgzroubdssuqeykqjcmguso
+fzq
+srfvysoxtlylctp
+
+pbfeiuzwoyixews
+ocvvunfsjnrtklmuuzjojw
+xdjcnrpqhmpmpcwacpcdtmbsczvhllkqapzjuaf
+nfnuvjz
+fwnuiyqpn
+wshxxxpzzxp
+hibrxcfeqca
+
+wqhlllarl
+bukcbojv
+plrytapy
+xm
+vlgfqoyzdczqbbaxjwbjjevjhxgopuqvqcrj
+vpjqfbdnsdxlbuuiqocvrhap
+mgumjbvnnzgnrdru
+gcgzugazxdcamrhczfzhtmdjj
+uislwq
+vooai
+zjuqfmebuzsqngzekyajujkopvayxtdzvugwwucvlsbrnhitfotmhhmgddlzlvqrkcponictrfweuilfjiuoabkfdvpjiqjrrgi
+aptjfhmrnxaq
+hbs
+w
+mwmoxqvucwygunplzvxtxpk
+fgmqmtlorfzytjdzffsosfccnfwugrsrynuej
+rpmpenrhsxoefnblyumjqwvuyszyppnttuyvazjdug
+zdzxraxkroknkmqgvuoqeqdtvclsvvuwmdwzfugcpteohlogxubyoebvrzbqzklvehfcqadtdrkpubfhmokzwyosogepwragcpwxo
+ax
+dz
+de
+
+thvkdmnbdws
+
+ejmubw
+umvwkaubzurf
+wyxtxeluaoox
+wwbioobtgmkebxo
+miglgnafmdarzkeblyjctuayzyoeqnfnbtrcbymdzkzg
+loavxq
+kzhllgsenxlbgdbfzwbg
+yxflogzsohlcycbyzegeubfflouvtuatixhjvicjegltjiy
+jigqfjppafdiarc
+mcnmwtachgearonfcymvjbrnljjxmlzkudvzqsarnfysmxlfrtlvjxwvpdbhvwysnvcdozfcruhjwnucdzakkilmlfgjiolcatpfusm
+
+n
+pdjunfcz
+dc
+edxkkxabsbvmvifiinnoccki
+bc
+gwtwsvorwzfqpz
+exidmexstfflkhi
+s
+s
+c
+wtcjfywlayhpbqktcepoybowtkrmnumqsg
+ozclkgjdmdk
+jmegtbunyexurvfexhqptnqzie
+tkoenpagzwqfawlxvzaijsjqhmg
+swodqfjpdqcbkc
+ujokogocyaygdibgpglecis
+shlmdmgonvpuaxlhrymkxtiytmv
+brhk
+jmsyiuomiywxhegilycjprkyfgojdo
+
+wzdzrgpdiosdsvkcw
+odlnmsfnjrcsnflviwvawybpczdkzvdocpwrmavz
+p
+ubowamlskcqhdxuckrxa
+fawhntiwhmdwkddnahmtajqqazpdygttqivhdiodkcpcwv
+gmxujmmaufmbipaiulhurzkfdg
+eixjhmbaeoybiwk
+kumntgrgiofcmujlzbcopuobambsw
+mnjkqiyb
+iktwnsnv
+hfuzcl
+tqiyqvagbqgtowpjbedgjot
+dfemvamelxadkztogliizdtsddoboafawficudlefo
+raecmxiiibljryswntpfed
+mbwrtsebkeegw
+x
+epp
+he
+
+vnztrswhiusokqdkmsnpuswucvfhcthjbtam
+baxlwidsgbdpzvnlj
+tcbjjoadrzo
+aiidahyllzzsg
+
+igebuubweicbssgddpmqxunrawavuglmpxrtkqsvjjtscibqiejjfgfnovokodmqcqitlteiakooupvzkwucucrfdzjvjbqbkgutoybmpfvhbutigdxhfiqfplyciz
+cnrhbjdnjftwfwlwzrdkwhajgsizsi
+qfntnt
+okqyfnbresp
+asyg
+mjqdkdyggdxzwuzglays
+h
+ifaqcazoy
+fol
+vvsusbnugduxsceozmsarbp
+epjwtorx
+bwiuxxiyc
+cw
+bwogruhctwkfvbexjnwircykxyzjmats
+kygiochfwlpsvmxcgmtjrgvfdptd
+q
+qmpqe
+
+z
+jghffhqfoecmszunhxmzmzhlmbrvjabhrkihgjmvckhkfpaygjkg
+
+kfiyfgounmhlvhupswqdgws
+ezzdpyqucqoocsdcjtruqpokldfkmjhqzoynirybsifyaxnaxppthjoqy
+nwetlgzwrhkhtuubbkbepuhbllxspvagxrqokwnrhkbwdwtp
+hlazomrhqogoaxypqaszwfxxmutvbpuuvpdffuqskcbzlwyzcssnflkwiydoveyxjnzllzhyozbsa
+hwnitkwbxcyibbqsluuqywbk
+
+ozpfjsdrc
+yoepefuy
+lvmspzepnetra
+genbrcrmuqfvkaouvuymoxhcxotjjhk
+pcshyqgbmqdubsdajnyfqvxkqvywffzn
+ukhcbyzwslqeq
+otfrmcbnhbyffxqregqoufdxucjunwdhlqqeiiawbxlpqeyzzopfungrryqdykgizrhqodirvazm
+dhpfhzyq
+cloz
+eduupqifolfekve
+qiec
+ishnjukvomntmdthlkajxpiwk
+y
+axl
+tmyskjqkjsvumizlal
+wvvolwewsfxhhdieuagdcuhwsgqvswpbkdkpxskloalmr
+ryfmhe
+z
+mmbpgsyrfvzdatbjrjhuipwt
+llzwizmmuulgwocowwmugtaoewkhnqxparvtynlffffdfcocdbba
+
+pyczkzbmcgrdnxnmezsx
+gsqe
+mcocxcolcynhpecstsn
+opnpplkccobjuhtbhirpzfxuktmpsiwbvsgiaavvdge
+wpaldxzasnrbvtugjwytvtfttrh
+zxecurevkjiyxy
+wtnovebcmglkktic
+fdpwfgvlvovxrwh
+bmwgdullzy
+uzwhagxinwqifxjbcntqzqoxkmpqxhe
+jrfizsnwxwnnhb
+inapddlahrp
+
+ndtvkceobe
+buskgghihdjmjlwfc
+j
+rkvffxwtmzoeruhlsurwtnuh
+cbvkhfepkdishfpqvijzrpleuy
+jzdpxjhcgqnybssfegvrnpgyehdqpgjwudbwrjbavp
+xzzvgqdrdwajmdmj
+vfatwsxvwfdbdhnijdujoyotwvwjipuuetichcfmvgrsnjpqaaezjtkvc
+lbfoqgfshrtwgdqufwnfuitdrjydqctqixlzufkdbp
+zgau
+qefdpmtkecvtj
+kuphldkvnzdtpd
+dti
+fpd
+gfrliyegxsb
+i
+qsddsrmkyfgzrjeqnitmnypbcakh
+vfbvbrpuogzhzrbmklvhji
+nkz
+xlufbaoblbmeub
+alwuzxzmobwdukvwnkiwmuqhuxfhevogdnqtmxjptqznrk
+cngpoty
+
+ms
+qvenfg
+dmeaffm
+jycfgnanbmoamhmarkmjcagbp
+ysqmbhopgx
+jczbzgwedsp
+
+zxzwjrxcwdtleizjlvifjwgxiibezwxhtzywqdi
+mtgnlu
+xboxirdchurkfnklnpkapnqfxnhrxyseiujrznjm
+
+zm
+atddskbghcahlhql
+szshwzmmvu
+befdtpouamwhiisyybispkchpjhownatawjfbx
+
+ennkzbrlygd
+zbt
+upphzpdwzmlhhhbqvjsfmbnrar
+ddcs
+ipbxgzyudjyongtcyygncojdufnufqpdppgvq
+gc
+isu
+foa
+wf
+jdlvqxgfbowhohhyyngbcs
+zjuwjyucdwblatsnywaaoftlcamfbcnw
+lzrioesuhoeevczuwrnltmkahfwiu
+uicggfbddqltnjyxfltbnaekncnyxsit
+zkxsqkqrwrzrxgxbsgxatybfr
+
+ptvmfyxdcglbfipcguqthjygzqnpqssscukzawynidtchjrrxwuxifoe
+w
+ohu
+vg
+zagpowezvbniybgold
+lhqseqcxteiqtgnpanpvrmvvlltxh
+mtfnxn
+wyodtg
+
+rawpbgtpbaktqzmmpzxmrlwpvvmdsl
+widcfbirvswraukbmkhf
+vplrueuxomjkqrtjgyxjdkexttzyozawyq
+hrpbahllznvmjudzxpbbv
+tlavfrxygjfipkupwnbacltcfepeg
+icu
+otxcu
+aewazy
+hl
+
+fmrp
+qaacthwzohenzjr
+xbyebba
+rvkph
+mkhhmh
+swme
+zjmdoypaktglcyzobquunvthcdwegtbywpijxd
+jvkuhnxqc
+gibhqgjojsxt
+bodbktzomiqujtbstqiyquwvqgufphqstenxvddkvtdh
+bpusrxkfi
+zgp
+pmxvgamydyakituvvsucsuidrlznupcsinltmrahulhepxmhoqtfvpjkxzhrrinncuh
+jzgkjjhjqykzelaszvcwvvwbnzsxdeaerfnaravk
+ynanrqyrxo
+zsmuxofullob
+brklgrcqefdyoczy
+qkpls
+snhqumae
+iqdtzjadzzvnqvdvjfsaf
+nfqfdqiramueblxkaqxbbkxwywzgdbndjjiqk
+tc
+kp
+cpuckbjsxhtxmomfesgxdpz
+oseif
+ybhxbvyxrpkrexrhjzoaxxohrhsniewsrktjnaztn
+ggelspdzhzbchruhbjbjidgjwdlhdycetqaswh
+jkgivsngygkbqtlmoj
+dwpnanfvitxg
+ospxbwxp
+wgvmvrnjescemdoiralbkvemalifxnyhrbdgodml
+hjtsnkzknkplbzsiwmneefdkihnhsamjsrxggclyjqgpqltizi
+
+
+sykgbuypwwhweab
+nvdkkkskmtiwpoerkon
+sx
+sbyflwwiqylbskdlxesmylpaz
+dnwcjenaluwesyywfaezznwkdwpoesxpu
+kie
+dslccwfryol
+gfhomgfn
+zprjtfqvkotktzidmoyrivall
+bunvsqkysdelozemnjoeqfolruulpbipm
+ullyzfahpkhkja
+hwd
+kvyqtprpuulgsk
+zotbkcadnxmfvqmtlbxalhughceyfcibtzzj
+vvpjbgxygl
+hpic
+mhrqd
+dv
+thehuzdbaacoidjoljbysnqwrrxxplrdznmgiukkvjqbopb
+moszjt
+rmtbunktkywqirveeqfa
+kse
+wbfflnatgzobjrxghjgvcsyxoruenxhyomutbptswjajawqjpqafpdcstkiyjuilimecgejpqmyciolgcmdpcstzdozbmnza
diff --git a/regex_macros/benches/shootout-regex-dna.rs b/regex_macros/benches/shootout-regex-dna.rs
deleted file mode 100644
index 3de583451f..0000000000
--- a/regex_macros/benches/shootout-regex-dna.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-// The Computer Language Benchmarks Game
-// http://benchmarksgame.alioth.debian.org/
-//
-// contributed by the Rust Project Developers
-
-// Copyright (c) 2014 The Rust Project Developers
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//
-// - Redistributions of source code must retain the above copyright
-//   notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-//   notice, this list of conditions and the following disclaimer in
-//   the documentation and/or other materials provided with the
-//   distribution.
-//
-// - Neither the name of "The Computer Language Benchmarks Game" nor
-//   the name of "The Computer Language Shootout Benchmarks" nor the
-//   names of its contributors may be used to endorse or promote
-//   products derived from this software without specific prior
-//   written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#![feature(plugin, scoped)]
-#![plugin(regex_macros)]
-
-extern crate regex;
-
-use std::io::{self, Read};
-use std::sync::Arc;
-use std::thread;
-use regex::NoExpand;
-
-#[test]
-fn check() {
-    static ANSWER: &'static str = "\
-agggtaaa|tttaccct 0
-[cgt]gggtaaa|tttaccc[acg] 3
-a[act]ggtaaa|tttacc[agt]t 9
-ag[act]gtaaa|tttac[agt]ct 8
-agg[act]taaa|ttta[agt]cct 10
-aggg[acg]aaa|ttt[cgt]ccct 3
-agggt[cgt]aa|tt[acg]accct 4
-agggta[cgt]a|t[acg]taccct 3
-agggtaa[cgt]|[acg]ttaccct 5
-
-101745
-100000
-133640";
-    static SEQ: &'static str = include_str!("regexdna-input.txt");
-    let got = run(SEQ.to_string()).connect("\n");
-    assert_eq!(ANSWER, got);
-}
-
-#[allow(dead_code)]
-fn main() {
-    let mut input = String::new();
-    io::stdin().read_to_string(&mut input).unwrap();
-    println!("{}", run(input).connect("\n"));
-}
-
-fn run(mut seq: String) -> Vec<String> {
-    let ilen = seq.len();
-
-    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, NoExpand(""));
-    let seq_arc = Arc::new(seq.clone()); // copy before it moves
-    let clen = seq.len();
-
-    let seqlen = thread::scoped(move|| {
-        let substs = vec![
-            (regex!("B"), "(c|g|t)"),
-            (regex!("D"), "(a|g|t)"),
-            (regex!("H"), "(a|c|t)"),
-            (regex!("K"), "(g|t)"),
-            (regex!("M"), "(a|c)"),
-            (regex!("N"), "(a|c|g|t)"),
-            (regex!("R"), "(a|g)"),
-            (regex!("S"), "(c|g)"),
-            (regex!("V"), "(a|c|g)"),
-            (regex!("W"), "(a|t)"),
-            (regex!("Y"), "(c|t)"),
-        ];
-        let mut seq = seq;
-        for (re, replacement) in substs.into_iter() {
-            seq = re.replace_all(&seq, NoExpand(replacement));
-        }
-        seq.len()
-    });
-
-    let variants = vec![
-        regex!("agggtaaa|tttaccct"),
-        regex!("[cgt]gggtaaa|tttaccc[acg]"),
-        regex!("a[act]ggtaaa|tttacc[agt]t"),
-        regex!("ag[act]gtaaa|tttac[agt]ct"),
-        regex!("agg[act]taaa|ttta[agt]cct"),
-        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
-        regex!("agggt[cgt]aa|tt[acg]accct"),
-        regex!("agggta[cgt]a|t[acg]taccct"),
-        regex!("agggtaa[cgt]|[acg]ttaccct"),
-    ];
-    let (mut variant_strs, mut counts) = (vec!(), vec!());
-    for variant in variants.into_iter() {
-        let seq_arc_copy = seq_arc.clone();
-        variant_strs.push(variant.to_string());
-        counts.push(thread::scoped(move|| {
-            variant.find_iter(&seq_arc_copy).count()
-        }));
-    }
-
-    let mut olines = Vec::new();
-    for (variant, count) in variant_strs.iter().zip(counts.into_iter()) {
-        olines.push(format!("{} {}", variant, count.join()));
-    }
-    olines.push("".to_string());
-    olines.push(format!("{}", ilen));
-    olines.push(format!("{}", clen));
-    olines.push(format!("{}", seqlen.join()));
-    olines
-}
diff --git a/regex_macros/src/lib.rs b/regex_macros/src/lib.rs
index cd6c8d232d..83917dc438 100644
--- a/regex_macros/src/lib.rs
+++ b/regex_macros/src/lib.rs
@@ -21,8 +21,6 @@ extern crate regex;
 extern crate syntax;
 extern crate rustc;
 
-use std::rc::Rc;
-
 use syntax::ast;
 use syntax::codemap;
 use syntax::ext::build::AstBuilder;
@@ -35,9 +33,8 @@ use syntax::ptr::P;
 use rustc::plugin::Registry;
 
 use regex::Regex;
-use regex::native::{
-    Inst, Program, Dynamic, ExDynamic, Native,
-    simple_case_fold,
+use regex::internal::{
+    Inst, LookInst, OneChar, CharRanges, Program, Dynamic, Native,
 };
 
 /// For the `regex!` syntax extension. Do not use.
@@ -61,11 +58,6 @@ pub fn plugin_registrar(reg: &mut Registry) {
 ///    direct `match pc { ... }`. The generators can be found in
 ///    `step_insts` and `add_insts`.
 ///
-/// Other more minor changes include eliding code when possible (although this
-/// isn't completely thorough at the moment), and translating character class
-/// matching from using a binary search to a simple `match` expression (see
-/// `match_class`).
-///
 /// It is strongly recommended to read the dynamic implementation in vm.rs
 /// first before trying to understand the code generator. The implementation
 /// strategy is identical and vm.rs has comments and will be easier to follow.
@@ -86,7 +78,7 @@ fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
         }
     };
     let prog = match re {
-        Dynamic(ExDynamic { ref prog, .. }) => prog.clone(),
+        Dynamic(ref prog) => prog.clone(),
         Native(_) => unreachable!(),
     };
 
@@ -120,17 +112,12 @@ impl<'a> NfaGen<'a> {
                 None => cx.expr_none(self.sp),
             }
         );
-        let prefix_anchor = match self.prog.insts[1] {
-            Inst::StartText => true,
-            _ => false,
-        };
-        let init_groups = self.vec_expr(0..num_cap_locs,
-                                        &mut |cx, _| cx.expr_none(self.sp));
+        let prefix_anchor = self.prog.anchored_begin;
 
-        let prefix_lit = Rc::new(self.prog.prefix.as_bytes().to_vec());
-        let prefix_bytes = self.cx.expr_lit(self.sp, ast::LitBinary(prefix_lit));
+        // let prefix_lit = Rc::new(self.prog.prefix.as_bytes().to_vec());
+        // let prefix_bytes = self.cx.expr_lit(self.sp, ast::LitBinary(prefix_lit));
 
-        let check_prefix = self.check_prefix();
+        // let check_prefix = self.check_prefix();
         let step_insts = self.step_insts();
         let add_insts = self.add_insts();
         let regex = &*self.original;
@@ -145,120 +132,136 @@ impl<'a> NfaGen<'a> {
 static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
 
 #[allow(dead_code)]
-fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
-            start: usize, end: usize) -> Vec<Option<usize>> {
+fn exec<'t>(
+    mut caps: &mut [Option<usize>],
+    input: &'t str,
+    start: usize,
+) -> bool {
     #![allow(unused_imports)]
     #![allow(unused_mut)]
 
-    use regex::native::{
-        MatchKind, Exists, Location, Submatches,
-        StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
-        CharReader, find_prefix, simple_case_fold,
-    };
+    use regex::internal::{Char, CharInput, InputAt, Input, Inst};
 
+    let input = CharInput::new(input);
+    let at = input.at(start);
     return Nfa {
-        which: which,
         input: input,
-        ic: 0,
-        chars: CharReader::new(input),
-    }.run(start, end);
-
-    type Captures = [Option<usize>; $num_cap_locs];
+        ncaps: caps.len(),
+    }.exec(&mut NfaThreads::new(), &mut caps, at);
 
     struct Nfa<'t> {
-        which: MatchKind,
-        input: &'t str,
-        ic: usize,
-        chars: CharReader<'t>,
+        input: CharInput<'t>,
+        ncaps: usize,
     }
 
     impl<'t> Nfa<'t> {
         #[allow(unused_variables)]
-        fn run(&mut self, start: usize, end: usize) -> Vec<Option<usize>> {
+        fn exec(
+            &mut self,
+            mut q: &mut NfaThreads,
+            mut caps: &mut [Option<usize>],
+            mut at: InputAt,
+        ) -> bool {
             let mut matched = false;
-            let prefix_bytes: &[u8] = $prefix_bytes;
-            let mut clist = Threads::new(self.which);
-            let mut nlist = Threads::new(self.which);
-            let (mut clist, mut nlist) = (&mut clist, &mut nlist);
-
-            let mut groups = $init_groups;
-
-            self.ic = start;
-            let mut next_ic = self.chars.set(start);
-            while self.ic <= end {
+            let (mut clist, mut nlist) = (&mut q.clist, &mut q.nlist);
+            clist.empty(); nlist.empty();
+'LOOP:      loop {
                 if clist.size == 0 {
-                    if matched {
-                        break
-                    }
-
-                    if $prefix_anchor && self.ic != 0 {
-                        break
+                    if matched || (!at.is_beginning() && $prefix_anchor) {
+                        break;
                     }
-
-                    $check_prefix
+                    // TODO: Prefix matching... Hmm.
+                    // Prefix matching now uses a DFA, so I think this is
+                    // going to require encoding that DFA statically.
                 }
                 if clist.size == 0 || (!$prefix_anchor && !matched) {
-                    self.add(&mut clist, 0, &mut groups)
+                    self.add(clist, &mut caps, 0, at);
                 }
-
-                self.ic = next_ic;
-                next_ic = self.chars.advance();
-
+                let at_next = self.input.at(at.next_pos());
                 for i in 0..clist.size {
                     let pc = clist.pc(i);
-                    let step_state = self.step(&mut groups, &mut nlist,
-                                               clist.groups(i), pc);
-                    match step_state {
-                        StepMatchEarlyReturn =>
-                            return vec![Some(0), Some(0)],
-                        StepMatch => { matched = true; break },
-                        StepContinue => {},
+                    let tcaps = clist.caps(i);
+                    if self.step(nlist, caps, tcaps, pc, at, at_next) {
+                        matched = true;
+                        if caps.len() == 0 {
+                            break 'LOOP;
+                        }
+                        break;
                     }
                 }
+                if at.char().is_none() {
+                    break;
+                }
+                at = at_next;
                 ::std::mem::swap(&mut clist, &mut nlist);
                 nlist.empty();
             }
-            match self.which {
-                Exists if matched     => vec![Some(0), Some(0)],
-                Exists                => vec![None, None],
-                Location | Submatches => groups.iter().map(|x| *x).collect(),
-            }
+            matched
         }
 
         // Sometimes `nlist` is never used (for empty regexes).
         #[allow(unused_variables)]
         #[inline]
-        fn step(&self, groups: &mut Captures, nlist: &mut Threads,
-                caps: &mut Captures, pc: usize) -> StepState {
-            $step_insts
-            StepContinue
+        fn step(
+            &self,
+            nlist: &mut Threads,
+            caps: &mut [Option<usize>],
+            thread_caps: &mut [Option<usize>],
+            pc: usize,
+            at: InputAt,
+            at_next: InputAt,
+        ) -> bool {
+            $step_insts;
+            false
         }
 
-        fn add(&self, nlist: &mut Threads, pc: usize,
-               groups: &mut Captures) {
+        fn add(
+            &self,
+            nlist: &mut Threads,
+            thread_caps: &mut [Option<usize>],
+            pc: usize,
+            at: InputAt,
+        ) {
             if nlist.contains(pc) {
-                return
+                return;
             }
+            let ti = nlist.add(pc);
             $add_insts
         }
     }
 
-    struct Thread {
-        pc: usize,
-        groups: Captures,
+    struct NfaThreads {
+        clist: Threads,
+        nlist: Threads,
     }
 
     struct Threads {
-        which: MatchKind,
-        queue: [Thread; $num_insts],
+        dense: [Thread; $num_insts],
         sparse: [usize; $num_insts],
         size: usize,
     }
 
+    struct Thread {
+        pc: usize,
+        caps: [Option<usize>; $num_cap_locs],
+    }
+
+    impl NfaThreads {
+        fn new() -> NfaThreads {
+            NfaThreads {
+                clist: Threads::new(),
+                nlist: Threads::new(),
+            }
+        }
+
+        fn swap(&mut self) {
+            ::std::mem::swap(&mut self.clist, &mut self.nlist);
+        }
+    }
+
     impl Threads {
-        fn new(which: MatchKind) -> Threads {
+        fn new() -> Threads {
             Threads {
-                which: which,
                 // These unsafe blocks are used for performance reasons, as it
                 // gives us a zero-cost initialization of a sparse set. The
                 // trick is described in more detail here:
@@ -266,43 +269,30 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
                 // The idea here is to avoid initializing threads that never
                 // need to be initialized, particularly for larger regexs with
                 // a lot of instructions.
-                queue: unsafe { ::std::mem::uninitialized() },
+                dense: unsafe { ::std::mem::uninitialized() },
                 sparse: unsafe { ::std::mem::uninitialized() },
                 size: 0,
             }
         }
 
         #[inline]
-        fn add(&mut self, pc: usize, groups: &Captures) {
-            let t = &mut self.queue[self.size];
-            t.pc = pc;
-            match self.which {
-                Exists => {},
-                Location => {
-                    t.groups[0] = groups[0];
-                    t.groups[1] = groups[1];
-                }
-                Submatches => {
-                    for (slot, val) in t.groups.iter_mut().zip(groups.iter()) {
-                        *slot = *val;
-                    }
-                }
-            }
-            self.sparse[pc] = self.size;
+        fn add(&mut self, pc: usize) -> usize {
+            let i = self.size;
+            self.dense[i].pc = pc;
+            self.sparse[pc] = i;
             self.size += 1;
+            i
         }
 
         #[inline]
-        fn add_empty(&mut self, pc: usize) {
-            self.queue[self.size].pc = pc;
-            self.sparse[pc] = self.size;
-            self.size += 1;
+        fn thread(&mut self, i: usize) -> &mut Thread {
+            &mut self.dense[i]
         }
 
         #[inline]
         fn contains(&self, pc: usize) -> bool {
             let s = self.sparse[pc];
-            s < self.size && self.queue[s].pc == pc
+            s < self.size && self.dense[s].pc == pc
         }
 
         #[inline]
@@ -312,17 +302,17 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
 
         #[inline]
         fn pc(&self, i: usize) -> usize {
-            self.queue[i].pc
+            self.dense[i].pc
         }
 
         #[inline]
-        fn groups<'r>(&'r mut self, i: usize) -> &'r mut Captures {
-            &mut self.queue[i].groups
+        fn caps<'r>(&'r mut self, i: usize) -> &'r mut [Option<usize>] {
+            &mut self.dense[i].caps
         }
     }
 }
 
-::regex::native::Native(::regex::native::ExNative {
+::regex::internal::Native(::regex::internal::ExNative {
     original: $regex,
     names: &CAP_NAMES,
     prog: exec,
@@ -336,104 +326,78 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
             let nextpc = pc + 1;
             let body = match *inst {
-                Inst::StartLine => {
+                Inst::EmptyLook(LookInst::StartLine) => {
                     quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        if self.chars.is_begin() || self.chars.prev == Some('\n') {
-                            self.add(nlist, $nextpc, &mut *groups)
+                        let prev = self.input.previous_at(at.pos());
+                        if prev.char().is_none() || prev.char() == '\n' {
+                            self.add(nlist, thread_caps, $nextpc, at);
                         }
                     })
                 }
-                Inst::StartText => {
+                Inst::EmptyLook(LookInst::EndLine) => {
                     quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        if self.chars.is_begin() {
-                            self.add(nlist, $nextpc, &mut *groups)
+                        if at.char().is_none() || at.char() == '\n' {
+                            self.add(nlist, thread_caps, $nextpc, at);
                         }
                     })
                 }
-                Inst::EndLine => {
+                Inst::EmptyLook(LookInst::StartText) => {
                     quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        if self.chars.is_end() || self.chars.cur == Some('\n') {
-                            self.add(nlist, $nextpc, &mut *groups)
+                        let prev = self.input.previous_at(at.pos());
+                        if prev.char().is_none() {
+                            self.add(nlist, thread_caps, $nextpc, at);
                         }
                     })
                 }
-                Inst::EndText => {
+                Inst::EmptyLook(LookInst::EndText) => {
                     quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        if self.chars.is_end() {
-                            self.add(nlist, $nextpc, &mut *groups)
+                        if at.char().is_none() {
+                            self.add(nlist, thread_caps, $nextpc, at);
                         }
                     })
                 }
-                Inst::WordBoundary => {
-                    quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        if self.chars.is_word_boundary() {
-                            self.add(nlist, $nextpc, &mut *groups)
-                        }
-                    })
-                }
-                Inst::NotWordBoundary => {
+                Inst::EmptyLook(ref wbty) => {
+                    let m = if *wbty == LookInst::WordBoundary {
+                        quote_expr!(self.cx, { w1 ^ w2 })
+                    } else {
+                        quote_expr!(self.cx, { !(w1 ^ w2) })
+                    };
                     quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        if !self.chars.is_word_boundary() {
-                            self.add(nlist, $nextpc, &mut *groups)
+                        let prev = self.input.previous_at(at.pos());
+                        let w1 = prev.char().is_word_char();
+                        let w2 = at.char().is_word_char();
+                        if $m {
+                            self.add(nlist, thread_caps, $nextpc, at);
                         }
                     })
                 }
-                Inst::Save(slot) => {
-                    let save = quote_expr!(self.cx, {
-                        let old = groups[$slot];
-                        groups[$slot] = Some(self.ic);
-                        self.add(nlist, $nextpc, &mut *groups);
-                        groups[$slot] = old;
-                    });
-                    let add = quote_expr!(self.cx, {
-                        self.add(nlist, $nextpc, &mut *groups);
-                    });
-                    // If this is saving a submatch location but we request
-                    // existence or only full match location, then we can skip
-                    // right over it every time.
-                    if slot > 1 {
-                        quote_expr!(self.cx, {
-                            nlist.add_empty($pc);
-                            match self.which {
-                                Submatches => $save,
-                                Exists | Location => $add,
-                            }
-                        })
+                Inst::Save(slot) => quote_expr!(self.cx, {
+                    if $slot >= self.ncaps {
+                        self.add(nlist, thread_caps, $nextpc, at);
                     } else {
-                        quote_expr!(self.cx, {
-                            nlist.add_empty($pc);
-                            match self.which {
-                                Submatches | Location => $save,
-                                Exists => $add,
-                            }
-                        })
+                        let old = thread_caps[$slot];
+                        thread_caps[$slot] = Some(at.pos());
+                        self.add(nlist, thread_caps, $nextpc, at);
+                        thread_caps[$slot] = old;
                     }
-                }
-                Inst::Jump(to) => {
-                    quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        self.add(nlist, $to, &mut *groups);
-                    })
-                }
-                Inst::Split(x, y) => {
-                    quote_expr!(self.cx, {
-                        nlist.add_empty($pc);
-                        self.add(nlist, $x, &mut *groups);
-                        self.add(nlist, $y, &mut *groups);
-                    })
-                }
+                }),
+                Inst::Jump(to) => quote_expr!(self.cx, {
+                    self.add(nlist, thread_caps, $to, at);
+                }),
+                Inst::Split(x, y) => quote_expr!(self.cx, {
+                    self.add(nlist, thread_caps, $x, at);
+                    self.add(nlist, thread_caps, $y, at);
+                }),
                 // For Match, OneChar, CharClass, Any, AnyNoNL
-                _ => quote_expr!(self.cx, nlist.add($pc, &*groups)),
+                _ => quote_expr!(self.cx, {
+                    let mut t = &mut nlist.thread(ti);
+                    for (slot, val) in t.caps.iter_mut().zip(thread_caps.iter()) {
+                        *slot = *val;
+                    }
+                }),
             };
             self.arm_inst(pc, body)
         }).collect::<Vec<ast::Arm>>();
-
         self.match_insts(arms)
     }
 
@@ -443,77 +407,35 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
             let nextpc = pc + 1;
             let body = match *inst {
-                Inst::Match => {
-                    quote_expr!(self.cx, {
-                        match self.which {
-                            Exists => {
-                                return StepMatchEarlyReturn
-                            }
-                            Location => {
-                                groups[0] = caps[0];
-                                groups[1] = caps[1];
-                                return StepMatch
-                            }
-                            Submatches => {
-                                for (slot, val) in groups.iter_mut().zip(caps.iter()) {
-                                    *slot = *val;
-                                }
-                                return StepMatch
-                            }
-                        }
-                    })
-                }
-                Inst::OneChar { c, casei } => {
-                    if casei {
-                        let upc = simple_case_fold(c);
-                        quote_expr!(self.cx, {
-                            let upc = self.chars.prev.map(simple_case_fold);
-                            if upc == Some($upc) {
-                                self.add(nlist, $nextpc, caps);
-                            }
-                        })
-                    } else {
-                        quote_expr!(self.cx, {
-                            if self.chars.prev == Some($c) {
-                                self.add(nlist, $nextpc, caps);
-                            }
-                        })
+                Inst::Match => quote_expr!(self.cx, {
+                    for (slot, val) in caps.iter_mut().zip(thread_caps.iter()) {
+                        *slot = *val;
                     }
-                }
-                Inst::CharClass(ref cls) => {
-                    let ranges: Vec<(char, char)> =
-                        cls.iter().map(|r| (r.start, r.end)).collect();
-                    let mranges = self.match_class(&ranges);
-                    let get_char =
-                        if cls.is_case_insensitive() {
-                            quote_expr!(
-                                self.cx,
-                                simple_case_fold(self.chars.prev.unwrap()))
-                        } else {
-                            quote_expr!(self.cx, self.chars.prev.unwrap())
-                        };
+                    return true;
+                }),
+                Inst::Char(OneChar { c, casei }) => quote_expr!(self.cx, {
+                    if $c == at.char() || ($casei && $c == at.char().case_fold()) {
+                        self.add(nlist, thread_caps, $nextpc, at_next);
+                    }
+                    return false;
+                }),
+                Inst::Ranges(CharRanges { ref ranges, casei }) => {
+                    let match_class = self.match_class(ranges);
                     quote_expr!(self.cx, {
-                        if self.chars.prev.is_some() {
-                            let c = $get_char;
-                            if $mranges {
-                                self.add(nlist, $nextpc, caps);
-                            }
+                        let mut c = at.char();
+                        if $casei {
+                            c = c.case_fold();
                         }
-                    })
-                }
-                Inst::Any => {
-                    quote_expr!(self.cx, self.add(nlist, $nextpc, caps))
-                }
-                Inst::AnyNoNL => {
-                    quote_expr!(self.cx, {
-                        if self.chars.prev != Some('\n') {
-                            self.add(nlist, $nextpc, caps);
+                        if let Some(c) = c.as_char() {
+                            if $match_class {
+                                self.add(nlist, thread_caps, $nextpc, at_next);
+                            }
                         }
-                        ()
+                        return false;
                     })
                 }
-                // EmptyBegin, EmptyEnd, EmptyWordBoundary, Save, Jump, Split
-                _ => self.empty_block(),
+                // EmptyLook, Save, Jump, Split
+                _ => quote_expr!(self.cx, { return false; }),
             };
             self.arm_inst(pc, body)
         }).collect::<Vec<ast::Arm>>();
@@ -526,13 +448,13 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
     // table).
     fn match_class(&self, ranges: &[(char, char)]) -> P<ast::Expr> {
         let mut arms = ranges.iter().map(|&(start, end)| {
-            let pat = self.cx.pat(self.sp, ast::PatRange(quote_expr!(self.cx, $start),
-                                                         quote_expr!(self.cx, $end)));
+            let pat = self.cx.pat(
+                self.sp, ast::PatRange(
+                    quote_expr!(self.cx, $start), quote_expr!(self.cx, $end)));
             self.cx.arm(self.sp, vec!(pat), quote_expr!(self.cx, true))
         }).collect::<Vec<ast::Arm>>();
 
         arms.push(self.wild_arm_expr(quote_expr!(self.cx, false)));
-
         let match_on = quote_expr!(self.cx, c);
         self.cx.expr_match(self.sp, match_on, arms)
     }
@@ -540,24 +462,24 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
     // Generates code for checking a literal prefix of the search string.
     // The code is only generated if the regex *has* a literal prefix.
     // Otherwise, a no-op is returned.
-    fn check_prefix(&self) -> P<ast::Expr> {
-        if self.prog.prefix.len() == 0 {
-            self.empty_block()
-        } else {
-            quote_expr!(self.cx,
-                if clist.size == 0 {
-                    let haystack = &self.input.as_bytes()[self.ic..];
-                    match find_prefix(prefix_bytes, haystack) {
-                        None => break,
-                        Some(i) => {
-                            self.ic += i;
-                            next_ic = self.chars.set(self.ic);
-                        }
-                    }
-                }
-            )
-        }
-    }
+    // fn check_prefix(&self) -> P<ast::Expr> {
+        // if self.prog.prefixes.len() == 0 {
+            // self.empty_block()
+        // } else {
+            // quote_expr!(self.cx,
+                // if clist.size == 0 {
+                    // let haystack = &self.input.as_bytes()[self.ic..];
+                    // match find_prefix(prefix_bytes, haystack) {
+                        // None => break,
+                        // Some(i) => {
+                            // self.ic += i;
+                            // next_ic = self.chars.set(self.ic);
+                        // }
+                    // }
+                // }
+            // )
+        // }
+    // }
 
     // Builds a `match pc { ... }` expression from a list of arms, specifically
     // for matching the current program counter with an instruction.
@@ -595,7 +517,6 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         }
     }
 
-
     // Converts `xs` to a `[x1, x2, .., xN]` expression by calling `to_expr`
     // on each element in `xs`.
     fn vec_expr<T, It: Iterator<Item=T>>(&self, xs: It,
diff --git a/regex_macros/tests/test_dynamic.rs b/regex_macros/tests/test_dynamic.rs
index 1056adfb29..7fa505a2ae 100644
--- a/regex_macros/tests/test_dynamic.rs
+++ b/regex_macros/tests/test_dynamic.rs
@@ -1,4 +1,4 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -15,14 +15,15 @@ extern crate regex;
 // Due to macro scoping rules, this definition only applies for the modules
 // defined below. Effectively, it allows us to use the same tests for both
 // native and dynamic regexes.
-macro_rules! regex(
-    ($re:expr) => (
-        match ::regex::Regex::new($re) {
-            Ok(re) => re,
-            Err(err) => panic!("{}", err),
-        }
-    );
-);
+//
+// This is also used to test the various matching engines. This one exercises
+// the normal code path which automatically chooses the engine based on the
+// regex and the input. Other dynamic tests explicitly set the engine to use.
+macro_rules! regex {
+    ($re:expr) => {
+        ::regex::Regex::with_engine(None, 10 * (1 << 20), $re).unwrap()
+    }
+}
 
 #[cfg(feature = "pattern")]
 macro_rules! searcher_expr { ($e:expr) => ($e) }
diff --git a/regex_macros/tests/test_dynamic_backtrack.rs b/regex_macros/tests/test_dynamic_backtrack.rs
new file mode 100644
index 0000000000..8f5d0dfa9a
--- /dev/null
+++ b/regex_macros/tests/test_dynamic_backtrack.rs
@@ -0,0 +1,27 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(feature = "pattern", feature(core))]
+
+extern crate regex;
+
+macro_rules! regex {
+    ($re:expr) => {{
+        let e = Some(::regex::internal::MatchEngine::Backtrack);
+        ::regex::Regex::with_engine(e, 10 * (1 << 20), $re).unwrap()
+    }}
+}
+
+#[cfg(feature = "pattern")]
+macro_rules! searcher_expr { ($e:expr) => ($e) }
+#[cfg(not(feature = "pattern"))]
+macro_rules! searcher_expr { ($e:expr) => ({}) }
+
+mod tests;
diff --git a/regex_macros/tests/test_dynamic_nfa.rs b/regex_macros/tests/test_dynamic_nfa.rs
new file mode 100644
index 0000000000..e5da5c879e
--- /dev/null
+++ b/regex_macros/tests/test_dynamic_nfa.rs
@@ -0,0 +1,27 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(feature = "pattern", feature(core))]
+
+extern crate regex;
+
+macro_rules! regex {
+    ($re:expr) => {{
+        let e = Some(::regex::internal::MatchEngine::Nfa);
+        ::regex::Regex::with_engine(e, 10 * (1 << 20), $re).unwrap()
+    }}
+}
+
+#[cfg(feature = "pattern")]
+macro_rules! searcher_expr { ($e:expr) => ($e) }
+#[cfg(not(feature = "pattern"))]
+macro_rules! searcher_expr { ($e:expr) => ({}) }
+
+mod tests;
diff --git a/run-shootout-test b/run-shootout-test
new file mode 100755
index 0000000000..8fef4e3a5e
--- /dev/null
+++ b/run-shootout-test
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+cargo build --example shootout-regex-dna
+exec diff \
+  ./examples/regexdna-output.txt \
+  <(./target/debug/examples/shootout-regex-dna < ./examples/regexdna-input.txt)
diff --git a/src/backtrack.rs b/src/backtrack.rs
new file mode 100644
index 0000000000..e397b2ebf9
--- /dev/null
+++ b/src/backtrack.rs
@@ -0,0 +1,269 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This is the backtracking matching engine. It has the same exact capability
+// as the full NFA simulation, except it is artificially restricted to small
+// regexes on small inputs because of its memory requirements.
+//
+// In particular, this is a *bounded* backtracking engine. It retains worst
+// case linear time by keeping track of the states that is has visited (using a
+// bitmap). Namely, once a state is visited, it is never visited again. Since a
+// state is keyed by `(instruction index, input index)`, we have that its time
+// complexity is `O(mn)`.
+//
+// The backtracking engine can beat out the NFA simulation on small
+// regexes/inputs because it doesn't have to keep track of multiple copies of
+// the capture groups. In benchmarks, the backtracking engine is roughly twice
+// as fast as the full NFA simulation.
+
+use input::{Input, InputAt, CharInput};
+use program::{Inst, InstIdx, Program};
+use re::CaptureIdxs;
+
+type Bits = u32;
+const BIT_SIZE: usize = 32;
+const MAX_PROG_SIZE: usize = 100;
+const MAX_INPUT_SIZE: usize = 256 * (1 << 10);
+
+// Total memory usage in bytes is determined by:
+//
+//   ((len(insts) * (len(input) + 1) + bits - 1) / bits) / (bits / 8)
+//
+// With the above settings, this comes out to ~3.2MB. Mostly these numbers
+// were picked empirically with suspicious benchmarks.
+
+/// A backtracking matching engine.
+#[derive(Debug)]
+pub struct Backtrack<'r, 't, 'c> {
+    prog: &'r Program,
+    input: CharInput<'t>,
+    caps: &'c mut CaptureIdxs,
+    m: BackMachine,
+}
+
+/// Shared cached state between multiple invocations of a backtracking engine
+/// in the same thread.
+///
+/// It is exported so that it can be cached by `program::Program`.
+#[derive(Debug)]
+pub struct BackMachine {
+    jobs: Vec<Job>,
+    visited: Vec<Bits>,
+}
+
+impl BackMachine {
+    /// Create new empty state for the backtracking engine.
+    pub fn new() -> BackMachine {
+        BackMachine {
+            jobs: vec![],
+            visited: vec![],
+        }
+    }
+}
+
+/// A job is an explicit unit of stack space in the backtracking engine.
+///
+/// The "normal" representation is a single state transition, which corresponds
+/// to an NFA state and a character in the input. However, the backtracking
+/// engine must keep track of old capture group values. We use the explicit
+/// stack to do it.
+#[derive(Clone, Copy, Debug)]
+enum Job {
+    Inst { pc: InstIdx, at: InputAt },
+    SaveRestore { slot: usize, old_pos: Option<usize> },
+}
+
+impl<'r, 't, 'c> Backtrack<'r, 't, 'c> {
+    /// Execute the backtracking matching engine.
+    ///
+    /// If there's a match, `exec` returns `true` and populates the given
+    /// captures accordingly.
+    pub fn exec(
+        prog: &'r Program,
+        mut caps: &mut CaptureIdxs,
+        text: &'t str,
+        start: usize,
+    ) -> bool {
+        let input = CharInput::new(text);
+        let start = input.at(start);
+        let m = prog.backtrack.get();
+        let mut b = Backtrack {
+            prog: prog,
+            input: input,
+            caps: caps,
+            m: m,
+        };
+        let matched = b.exec_(start);
+        prog.backtrack.put(b.m);
+        matched
+    }
+
+    /// Returns true iff the given regex and input can be executed by this
+    /// engine with reasonable memory usage.
+    pub fn should_exec(prog: &'r Program, input: &str) -> bool {
+        prog.insts.len() <= MAX_PROG_SIZE && input.len() <= MAX_INPUT_SIZE
+    }
+
+    fn clear(&mut self) {
+        // Reset the job memory so that we start fresh.
+        self.m.jobs.truncate(0);
+
+        // Now we need to clear the bit state set.
+        // We do this by figuring out how much space we need to keep track
+        // of the states we've visited.
+        // Then we reset all existing allocated space to 0.
+        // Finally, we request more space if we need it.
+        //
+        // This is all a little circuitous, but doing this unsafely
+        // doesn't seem to have a measurable impact on performance.
+        // (Probably because backtracking is limited to such small
+        // inputs/regexes in the first place.)
+        let visited_len =
+            (self.prog.insts.len() * (self.input.len() + 1) + BIT_SIZE - 1)
+            /
+            BIT_SIZE;
+        for v in &mut self.m.visited {
+            *v = 0;
+        }
+        let cur_visited_cap = self.m.visited.capacity();
+        if visited_len > cur_visited_cap {
+            self.m.visited.reserve_exact(visited_len - cur_visited_cap);
+            for _ in 0..(visited_len - cur_visited_cap) {
+                self.m.visited.push(0);
+            }
+        }
+    }
+
+    fn exec_(&mut self, mut at: InputAt) -> bool {
+        self.clear();
+        if self.prog.anchored_begin {
+            return if !at.is_beginning() {
+                false
+            } else {
+                match self.input.prefix_at(&self.prog.prefixes, at) {
+                    None => false,
+                    Some(at) => self.backtrack(at),
+                }
+            };
+        }
+        loop {
+            if !self.prog.prefixes.is_empty() {
+                at = match self.input.prefix_at(&self.prog.prefixes, at) {
+                    None => return false,
+                    Some(at) => at,
+                };
+            }
+            if self.backtrack(at) {
+                return true;
+            }
+            if at.char().is_none() {
+                return false;
+            }
+            at = self.input.at(at.next_pos());
+        }
+    }
+
+    // This `inline(always)` seems to result in about a 10-15% increase in
+    // throughput on the `hard` benchmarks (over a standard `inline`). ---AG
+    #[inline(always)]
+    fn backtrack(&mut self, start: InputAt) -> bool {
+        self.push(0, start);
+        while let Some(job) = self.m.jobs.pop() {
+            match job {
+                Job::Inst { pc, at } => {
+                    if self.step(pc, at) {
+                        return true;
+                    }
+                }
+                Job::SaveRestore { slot, old_pos } => {
+                    self.caps[slot] = old_pos;
+                }
+            }
+        }
+        false
+    }
+
+    fn step(&mut self, mut pc: InstIdx, mut at: InputAt) -> bool {
+        use program::Inst::*;
+        loop {
+            // This loop is an optimization to avoid constantly pushing/popping
+            // from the stack. Namely, if we're pushing a job only to run it
+            // next, avoid the push and just mutate `pc` (and possibly `at`)
+            // in place.
+            match self.prog.insts[pc] {
+                Match => return true,
+                Save(slot) => {
+                    if slot < self.caps.len() {
+                        // If this path doesn't work out, then we save the old
+                        // capture index (if one exists) in an alternate
+                        // job. If the next path fails, then the alternate
+                        // job is popped and the old capture index is restored.
+                        let old_pos = self.caps[slot];
+                        self.push_save_restore(slot, old_pos);
+                        self.caps[slot] = Some(at.pos());
+                    }
+                    pc += 1;
+                }
+                Jump(pc2) => pc = pc2,
+                Split(x, y) => {
+                    self.push(y, at);
+                    pc = x;
+                }
+                EmptyLook(ref inst) => {
+                    let prev = self.input.previous_at(at.pos());
+                    if inst.matches(prev.char(), at.char()) {
+                        pc += 1;
+                    } else {
+                        return false;
+                    }
+                }
+                Char(ref inst) => {
+                    if inst.matches(at.char()) {
+                        pc += 1;
+                        at = self.input.at(at.next_pos());
+                    } else {
+                        return false;
+                    }
+                }
+                Ranges(ref inst) => {
+                    if inst.matches(at.char()).is_some() {
+                        pc += 1;
+                        at = self.input.at(at.next_pos());
+                    } else {
+                        return false;
+                    }
+                }
+            }
+            if self.has_visited(pc, at) {
+                return false;
+            }
+        }
+    }
+
+    fn push(&mut self, pc: InstIdx, at: InputAt) {
+        self.m.jobs.push(Job::Inst { pc: pc, at: at });
+    }
+
+    fn push_save_restore(&mut self, slot: usize, old_pos: Option<usize>) {
+        self.m.jobs.push(Job::SaveRestore { slot: slot, old_pos: old_pos });
+    }
+
+    fn has_visited(&mut self, pc: InstIdx, at: InputAt) -> bool {
+        let k = pc * (self.input.len() + 1) + at.pos();
+        let k1 = k / BIT_SIZE;
+        let k2 = (1 << (k & (BIT_SIZE - 1))) as Bits;
+        if self.m.visited[k1] & k2 == 0 {
+            self.m.visited[k1] |= k2;
+            false
+        } else {
+            true
+        }
+    }
+}
diff --git a/src/char.rs b/src/char.rs
new file mode 100644
index 0000000000..43661717ca
--- /dev/null
+++ b/src/char.rs
@@ -0,0 +1,107 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::char;
+use std::cmp::Ordering;
+use std::fmt;
+use std::u32;
+
+use syntax;
+
+/// An inline representation of `Option<char>`.
+///
+/// This eliminates the need to do case analysis on `Option<char>` to determine
+/// ordinality with other characters.
+///
+/// (The `Option<char>` is not related to encoding. Instead, it is used in the
+/// matching engines to represent the beginning and ending boundaries of the
+/// search text.)
+#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Char(u32);
+
+impl fmt::Debug for Char {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match char::from_u32(self.0) {
+            None => write!(f, "Empty"),
+            Some(c) => write!(f, "{:?}", c),
+        }
+    }
+}
+
+impl Char {
+    /// Returns true iff the character is absent.
+    #[inline]
+    pub fn is_none(self) -> bool { self.0 == u32::MAX }
+
+    /// Returns the length of the character's UTF-8 encoding.
+    ///
+    /// If the character is absent, then `0` is returned.
+    #[inline]
+    pub fn len_utf8(self) -> usize {
+        char::from_u32(self.0).map(|c| c.len_utf8()).unwrap_or(0)
+    }
+
+    /// Returns the simple case folding of this character.
+    ///
+    /// If the character is absent, then absence is returned.
+    pub fn case_fold(self) -> Char {
+        char::from_u32(self.0).map(syntax::simple_case_fold).into()
+    }
+
+    /// Returns true iff the character is a word character.
+    ///
+    /// If the character is absent, then false is returned.
+    pub fn is_word_char(self) -> bool {
+        char::from_u32(self.0).map(syntax::is_word_char).unwrap_or(false)
+    }
+
+    /// Converts the character to a real primitive `char`.
+    ///
+    /// If the character is absent, then `None` is returned.
+    pub fn as_char(self) -> Option<char> {
+        // This is only used in the `regex!` macro because it expands char
+        // classes into `match` expressions (instead of binary search).
+        char::from_u32(self.0)
+    }
+}
+
+impl From<char> for Char {
+    fn from(c: char) -> Char { Char(c as u32) }
+}
+
+impl From<Option<char>> for Char {
+    fn from(c: Option<char>) -> Char {
+        c.map(|c| c.into()).unwrap_or(Char(u32::MAX))
+    }
+}
+
+impl PartialEq<char> for Char {
+    #[inline]
+    fn eq(&self, other: &char) -> bool { self.0 == *other as u32 }
+}
+
+impl PartialEq<Char> for char {
+    #[inline]
+    fn eq(&self, other: &Char) -> bool { *self as u32 == other.0 }
+}
+
+impl PartialOrd<char> for Char {
+    #[inline]
+    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+        self.0.partial_cmp(&(*other as u32))
+    }
+}
+
+impl PartialOrd<Char> for char {
+    #[inline]
+    fn partial_cmp(&self, other: &Char) -> Option<Ordering> {
+        (*self as u32).partial_cmp(&other.0)
+    }
+}
diff --git a/src/compile.rs b/src/compile.rs
index 413da3e9e8..012328c6a3 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -1,4 +1,4 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -8,149 +8,87 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// Enable this to squash warnings due to exporting pieces of the representation
-// for use with the regex! macro. See lib.rs for explanation.
-
-use self::Inst::*;
-
-use std::cmp;
 use syntax::{self, Expr, Repeater};
-use Error;
 
-pub type InstIdx = usize;
+use Error;
+use program::{CharRanges, Inst, InstIdx, OneChar};
 
-/// An instruction, the underlying unit of a compiled regular expression
-#[allow(missing_docs)]
-#[derive(Debug, Clone)]
-pub enum Inst {
-    /// When a Match instruction is executed, the current thread is successful.
-    Match,
-    OneChar { c: char, casei: bool },
-    CharClass(syntax::CharClass),
-    Any,
-    AnyNoNL,
-    StartLine,
-    EndLine,
-    StartText,
-    EndText,
-    WordBoundary,
-    NotWordBoundary,
-    Save(usize),
-    Jump(InstIdx),
-    Split(InstIdx, InstIdx),
-}
+type Compiled = (Vec<Inst>, Vec<Option<String>>);
 
-/// Program represents a compiled regular expression. Once an expression is
-/// compiled, its representation is immutable and will never change.
+/// A regex compiler.
 ///
-/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
-/// "MaybeOwned" types so that a `Program` can be represented as static data.
-/// (This makes it convenient and efficient for use with the `regex!` macro.)
-#[derive(Clone, Debug)]
-pub struct Program {
-    /// A sequence of instructions.
-    pub insts: Vec<Inst>,
-    /// If the regular expression requires a literal prefix in order to have a
-    /// match, that prefix is stored here. (It's used in the VM to implement
-    /// an optimization.)
-    pub prefix: String,
+/// A regex compiler is responsible for turning a regex's AST into a sequence
+/// of instructions.
+pub struct Compiler {
+    size_limit: usize,
+    insts: Vec<Inst>,
+    cap_names: Vec<Option<String>>,
 }
 
-impl Program {
-    /// Compiles a Regex given its AST.
-    pub fn new(ast: Expr, size: usize) -> Result<(Program, Vec<Option<String>>), Error> {
-        let mut c = Compiler {
-            insts: Vec::with_capacity(100),
-            names: vec![None],
-            size_limit: size,
-        };
-
-        c.insts.push(Save(0));
-        try!(c.compile(ast));
-        c.insts.push(Save(1));
-        c.insts.push(Match);
-
-        // Try to discover a literal string prefix.
-        // This is a bit hacky since we have to skip over the initial
-        // 'Save' instruction.
-        let mut pre = String::with_capacity(5);
-        for inst in c.insts[1..].iter() {
-            match *inst {
-                OneChar { c, casei: false } => pre.push(c),
-                _ => break
-            }
+impl Compiler {
+    /// Creates a new compiler that limits the size of the regex program
+    /// to the size given (in bytes).
+    pub fn new(size_limit: usize) -> Compiler {
+        Compiler {
+            size_limit: size_limit,
+            insts: vec![],
+            cap_names: vec![None],
         }
-
-        let Compiler { insts, names, .. } = c;
-        let prog = Program {
-            insts: insts,
-            prefix: pre,
-        };
-        Ok((prog, names))
     }
 
-    /// Returns the total number of capture groups in the regular expression.
-    /// This includes the zeroth capture.
-    pub fn num_captures(&self) -> usize {
-        let mut n = 0;
-        for inst in self.insts.iter() {
-            match *inst {
-                Save(c) => n = cmp::max(n, c+1),
-                _ => {}
-            }
-        }
-        // There's exactly 2 Save slots for every capture.
-        n / 2
+    /// Compiles the given regex AST into a tuple of a sequence of
+    /// instructions and a sequence of capture groups, optionally named.
+    pub fn compile(mut self, ast: Expr) -> Result<Compiled, Error> {
+        self.insts.push(Inst::Save(0));
+        try!(self.c(ast));
+        self.insts.push(Inst::Save(1));
+        self.insts.push(Inst::Match);
+        Ok((self.insts, self.cap_names))
     }
-}
 
-struct Compiler {
-    insts: Vec<Inst>,
-    names: Vec<Option<String>>,
-    size_limit: usize,
-}
-
-// The compiler implemented here is extremely simple. Most of the complexity
-// in this crate is in the parser or the VM.
-// The only tricky thing here is patching jump/split instructions to point to
-// the right instruction.
-impl Compiler {
-    fn check_size(&self) -> Result<(), Error> {
-        if self.insts.len() * ::std::mem::size_of::<Inst>() > self.size_limit {
-            Err(Error::CompiledTooBig(self.size_limit))
-        } else {
-            Ok(())
-        }
-    }
+    fn c(&mut self, ast: Expr) -> Result<(), Error> {
+        use program::Inst::*;
+        use program::LookInst::*;
 
-    fn compile(&mut self, ast: Expr) -> Result<(), Error> {
         match ast {
             Expr::Empty => {},
             Expr::Literal { chars, casei } => {
-                for c in chars {
-                    self.push(OneChar { c: c, casei: casei });
+                for mut c in chars {
+                    if casei {
+                        c = syntax::simple_case_fold(c);
+                    }
+                    self.push(Char(OneChar { c: c, casei: casei }));
+                }
+            }
+            Expr::AnyChar => self.push(Ranges(CharRanges::any())),
+            Expr::AnyCharNoNL => self.push(Ranges(CharRanges::any_nonl())),
+            Expr::Class(cls) => {
+                if cls.len() == 1 && cls[0].start == cls[0].end {
+                    self.push(Char(OneChar {
+                        c: cls[0].start,
+                        casei: cls.is_case_insensitive(),
+                    }));
+                } else {
+                    self.push(Ranges(CharRanges::from_class(cls)));
                 }
             }
-            Expr::AnyChar => self.push(Any),
-            Expr::AnyCharNoNL => self.push(AnyNoNL),
-            Expr::Class(cls) => self.push(CharClass(cls)),
-            Expr::StartLine => self.push(StartLine),
-            Expr::EndLine => self.push(EndLine),
-            Expr::StartText => self.push(StartText),
-            Expr::EndText => self.push(EndText),
-            Expr::WordBoundary => self.push(WordBoundary),
-            Expr::NotWordBoundary => self.push(NotWordBoundary),
-            Expr::Group { e, i: None, name: None } => try!(self.compile(*e)),
+            Expr::StartLine => self.push(EmptyLook(StartLine)),
+            Expr::EndLine => self.push(EmptyLook(EndLine)),
+            Expr::StartText => self.push(EmptyLook(StartText)),
+            Expr::EndText => self.push(EmptyLook(EndText)),
+            Expr::WordBoundary => self.push(EmptyLook(WordBoundary)),
+            Expr::NotWordBoundary => self.push(EmptyLook(NotWordBoundary)),
+            Expr::Group { e, i: None, name: None } => try!(self.c(*e)),
             Expr::Group { e, i, name } => {
                 let i = i.expect("capture index");
-                self.names.push(name);
+                self.cap_names.push(name);
                 self.push(Save(2 * i));
-                try!(self.compile(*e));
+                try!(self.c(*e));
                 self.push(Save(2 * i + 1));
             }
             Expr::Concat(es) => {
                 for e in es {
-                    try!(self.compile(e));
+                    try!(self.c(e));
                 }
             }
             Expr::Alternate(mut es) => {
@@ -160,26 +98,26 @@ impl Compiler {
                 }
                 let e1 = es.remove(0);
                 if es.len() == 0 {
-                    try!(self.compile(e1));
+                    try!(self.c(e1));
                     return Ok(());
                 }
                 let e2 = Expr::Alternate(es); // this causes recursion
 
-                let split = self.empty_split(); // push: split 0, 0
+                let split = self.empty_split();
                 let j1 = self.insts.len();
-                try!(self.compile(e1));                // push: insts for x
-                let jmp = self.empty_jump();    // push: jmp 0
+                try!(self.c(e1));
+                let jmp = self.empty_jump();
                 let j2 = self.insts.len();
-                try!(self.compile(e2));                // push: insts for y
+                try!(self.c(e2));
                 let j3 = self.insts.len();
 
-                self.set_split(split, j1, j2);  // split 0, 0 -> split j1, j2
-                self.set_jump(jmp, j3);         // jmp 0      -> jmp j3
+                self.set_split(split, j1, j2);
+                self.set_jump(jmp, j3);
             }
             Expr::Repeat { e, r: Repeater::ZeroOrOne, greedy } => {
                 let split = self.empty_split();
                 let j1 = self.insts.len();
-                try!(self.compile(*e));
+                try!(self.c(*e));
                 let j2 = self.insts.len();
 
                 if greedy {
@@ -192,7 +130,7 @@ impl Compiler {
                 let j1 = self.insts.len();
                 let split = self.empty_split();
                 let j2 = self.insts.len();
-                try!(self.compile(*e));
+                try!(self.c(*e));
                 let jmp = self.empty_jump();
                 let j3 = self.insts.len();
 
@@ -205,7 +143,7 @@ impl Compiler {
             }
             Expr::Repeat { e, r: Repeater::OneOrMore, greedy } => {
                 let j1 = self.insts.len();
-                try!(self.compile(*e));
+                try!(self.c(*e));
                 let split = self.empty_split();
                 let j2 = self.insts.len();
 
@@ -215,24 +153,32 @@ impl Compiler {
                     self.set_split(split, j2, j1);
                 }
             }
-            Expr::Repeat { e, r: Repeater::Range { min, max: None }, greedy } => {
+            Expr::Repeat {
+                e,
+                r: Repeater::Range { min, max: None },
+                greedy,
+            } => {
                 let e = *e;
                 for _ in 0..min {
-                    try!(self.compile(e.clone()));
+                    try!(self.c(e.clone()));
                 }
-                try!(self.compile(Expr::Repeat {
+                try!(self.c(Expr::Repeat {
                     e: Box::new(e),
                     r: Repeater::ZeroOrMore,
                     greedy: greedy,
                 }));
             }
-            Expr::Repeat { e, r: Repeater::Range { min, max: Some(max) }, greedy } => {
+            Expr::Repeat {
+                e,
+                r: Repeater::Range { min, max: Some(max) },
+                greedy,
+            } => {
                 let e = *e;
                 for _ in 0..min {
-                    try!(self.compile(e.clone()));
+                    try!(self.c(e.clone()));
                 }
                 for _ in min..max {
-                    try!(self.compile(Expr::Repeat {
+                    try!(self.c(Expr::Repeat {
                         e: Box::new(e.clone()),
                         r: Repeater::ZeroOrOne,
                         greedy: greedy,
@@ -243,6 +189,16 @@ impl Compiler {
         self.check_size()
     }
 
+    fn check_size(&self) -> Result<(), Error> {
+        use std::mem::size_of;
+
+        if self.insts.len() * size_of::<Inst>() > self.size_limit {
+            Err(Error::CompiledTooBig(self.size_limit))
+        } else {
+            Ok(())
+        }
+    }
+
     /// Appends the given instruction to the program.
     #[inline]
     fn push(&mut self, x: Inst) {
@@ -254,7 +210,7 @@ impl Compiler {
     /// the actual locations of the split in later.)
     #[inline]
     fn empty_split(&mut self) -> InstIdx {
-        self.insts.push(Split(0, 0));
+        self.insts.push(Inst::Split(0, 0));
         self.insts.len() - 1
     }
 
@@ -266,7 +222,7 @@ impl Compiler {
     fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
         let split = &mut self.insts[i];
         match *split {
-            Split(_, _) => *split = Split(pc1, pc2),
+            Inst::Split(_, _) => *split = Inst::Split(pc1, pc2),
             _ => panic!("BUG: Invalid split index."),
         }
     }
@@ -275,7 +231,7 @@ impl Compiler {
     /// index of that instruction.
     #[inline]
     fn empty_jump(&mut self) -> InstIdx {
-        self.insts.push(Jump(0));
+        self.insts.push(Inst::Jump(0));
         self.insts.len() - 1
     }
 
@@ -286,7 +242,7 @@ impl Compiler {
     fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
         let jmp = &mut self.insts[i];
         match *jmp {
-            Jump(_) => *jmp = Jump(pc),
+            Inst::Jump(_) => *jmp = Inst::Jump(pc),
             _ => panic!("BUG: Invalid jump index."),
         }
     }
diff --git a/src/input.rs b/src/input.rs
new file mode 100644
index 0000000000..446872bb46
--- /dev/null
+++ b/src/input.rs
@@ -0,0 +1,114 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::ops;
+
+use char::Char;
+use prefix::Prefix;
+
+/// Represents a location in the input.
+#[derive(Clone, Copy, Debug)]
+pub struct InputAt {
+    pos: usize,
+    c: Char,
+    len: usize,
+}
+
+impl InputAt {
+    /// Returns true iff this position is at the beginning of the input.
+    pub fn is_beginning(&self) -> bool {
+        self.pos == 0
+    }
+
+    /// Returns the character at this position.
+    ///
+    /// If this position is just before or after the input, then an absent
+    /// character is returned.
+    pub fn char(&self) -> Char {
+        self.c
+    }
+
+    /// Returns the UTF-8 width of the character at this position.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns the byte offset of this position.
+    pub fn pos(&self) -> usize {
+        self.pos
+    }
+
+    /// Returns the byte offset of the next position in the input.
+    pub fn next_pos(&self) -> usize {
+        self.pos + self.len
+    }
+}
+
+/// An abstraction over input used in the matching engines.
+pub trait Input {
+    /// Return an encoding of the position at byte offset `i`.
+    fn at(&self, i: usize) -> InputAt;
+    /// Return an encoding of the char position just prior to byte offset `i`.
+    fn previous_at(&self, i: usize) -> InputAt;
+    /// Scan the input for a matching prefix.
+    fn prefix_at(&self, prefixes: &Prefix, at: InputAt) -> Option<InputAt>;
+}
+
+/// An input reader over characters.
+///
+/// (This is the only implementation of `Input` at the moment.)
+#[derive(Debug)]
+pub struct CharInput<'t>(&'t str);
+
+impl<'t> CharInput<'t> {
+    /// Return a new character input reader for the given string.
+    pub fn new(s: &'t str) -> CharInput<'t> {
+        CharInput(s)
+    }
+}
+
+impl<'t> ops::Deref for CharInput<'t> {
+    type Target = str;
+
+    fn deref(&self) -> &str {
+        self.0
+    }
+}
+
+impl<'t> Input for CharInput<'t> {
+    // This `inline(always)` increases throughput by almost 25% on the `hard`
+    // benchmarks over a normal `inline` annotation.
+    //
+    // I'm not sure why `#[inline]` isn't enough to convince LLVM, but it is
+    // used *a lot* in the guts of the matching engines.
+    #[inline(always)]
+    fn at(&self, i: usize) -> InputAt {
+        let c = self[i..].chars().next().into();
+        InputAt {
+            pos: i,
+            c: c,
+            len: c.len_utf8(),
+        }
+    }
+
+    fn previous_at(&self, i: usize) -> InputAt {
+        let c: Char = self[..i].chars().rev().next().into();
+        let len = c.len_utf8();
+        InputAt {
+            pos: i - len,
+            c: c,
+            len: len,
+        }
+    }
+
+    fn prefix_at(&self, prefixes: &Prefix, at: InputAt) -> Option<InputAt> {
+        prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index d63c98dad4..9de4c99e84 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,4 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -66,8 +66,10 @@
 //! compiles*. Said differently, if you only use `regex!` to build regular
 //! expressions in your program, then your program cannot compile with an
 //! invalid regular expression. Moreover, the `regex!` macro compiles the
-//! given expression to native Rust code, which makes it much faster for
-//! searching text.
+//! given expression to native Rust code, which ideally makes it faster.
+//! Unfortunately (or fortunately), the dynamic implementation has had a lot
+//! more optimization work put it into it currently, so it is faster than
+//! the `regex!` macro in most cases.
 //!
 //! To use the `regex!` macro, you must enable the `plugin` feature and import
 //! the `regex_macros` crate as a syntax extension:
@@ -88,7 +90,7 @@
 //! Secondly, the `regex` crate *must* be linked with the name `regex` since
 //! the generated code depends on finding symbols in the `regex` crate.
 //!
-//! The only downside of using the `regex!` macro is that it can increase the
+//! One downside of using the `regex!` macro is that it can increase the
 //! size of your program's binary since it generates specialized Rust code.
 //! The extra size probably won't be significant for a small number of
 //! expressions, but 100+ calls to `regex!` will probably result in a
@@ -394,6 +396,8 @@
        html_favicon_url = "http://www.rust-lang.org/favicon.ico",
        html_root_url = "http://doc.rust-lang.org/regex/")]
 
+extern crate aho_corasick;
+extern crate memchr;
 extern crate regex_syntax as syntax;
 
 pub use re::{
@@ -403,36 +407,25 @@ pub use re::{
     quote, is_match,
 };
 
+mod backtrack;
+mod char;
 mod compile;
+mod input;
+mod pool;
+mod prefix;
+mod program;
+mod nfa;
 mod re;
-mod vm;
 
-/// The `native` module exists to support the `regex!` macro. Do not use.
+/// The `internal` module exists to support the `regex!` macro and other
+/// suspicious activity, such as testing different matching engines.
 #[doc(hidden)]
-pub mod native {
-    // Exporting this stuff is bad form, but it's necessary for two reasons.
-    // Firstly, the `regex!` syntax extension is in a different crate and
-    // requires access to the representation of a regex (particularly the
-    // instruction set) in order to compile to native Rust. This could be
-    // mitigated if `regex!` was defined in the same crate, but this has
-    // undesirable consequences (such as requiring a dependency on
-    // `libsyntax`).
-    //
-    // Secondly, the code generated by `regex!` must *also* be able
-    // to access various functions in this crate to reduce code duplication
-    // and to provide a value with precisely the same `Regex` type in this
-    // crate. This, AFAIK, is impossible to mitigate.
-    //
-    // On the bright side, `rustdoc` lets us hide this from the public API
-    // documentation.
-    pub use compile::Program;
-    pub use compile::Inst;
-    pub use syntax::simple_case_fold;
-    pub use re::{ExDynamic, ExNative};
-    pub use re::Regex::{Dynamic, Native};
-    pub use vm::{CharReader, find_prefix};
-    pub use vm::MatchKind::{self, Exists, Location, Submatches};
-    pub use vm::StepState::{
-        self, StepMatchEarlyReturn, StepMatch, StepContinue,
+pub mod internal {
+    pub use char::Char;
+    pub use input::{Input, CharInput, InputAt};
+    pub use program::{
+        Program, MatchEngine, CharRanges, Inst, LookInst, OneChar,
     };
+    pub use re::ExNative;
+    pub use re::Regex::{Dynamic, Native};
 }
diff --git a/src/nfa.rs b/src/nfa.rs
new file mode 100644
index 0000000000..408a150aec
--- /dev/null
+++ b/src/nfa.rs
@@ -0,0 +1,300 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// FIXME: Currently, the VM simulates an NFA. It would be nice to have another
+// VM that simulates a DFA.
+//
+// According to Russ Cox[1], a DFA performs better than an NFA, principally
+// because it reuses states previously computed by the machine *and* doesn't
+// keep track of capture groups. The drawback of a DFA (aside from its
+// complexity) is that it can't accurately return the locations of submatches.
+// The NFA *can* do that. (This is my understanding anyway.)
+//
+// Cox suggests that a DFA ought to be used to answer "does this match" and
+// "where does it match" questions. (In the latter, the starting position of
+// the match is computed by executing the regex backwards.) Cox also suggests
+// that a DFA should be run when asking "where are the submatches", which can
+// 1) quickly answer "no" is there's no match and 2) discover the substring
+// that matches, which means running the NFA on smaller input.
+//
+// Currently, the NFA simulation implemented below does some dirty tricks to
+// avoid tracking capture groups when they aren't needed (which only works
+// for 'is_match', not 'find'). This is a half-measure, but does provide some
+// perf improvement.
+//
+// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go.
+//
+// UPDATE: We now have a backtracking matching engine and a DFA for prefix
+// matching. The prefix DFA is used in both the NFA simulation below and the
+// backtracking engine to skip along the input quickly.
+//
+// [1] - http://swtch.com/~rsc/regex/regex3.html
+
+use input::{Input, InputAt, CharInput};
+use program::Program;
+use re::CaptureIdxs;
+
+/// An NFA simulation matching engine.
+#[derive(Debug)]
+pub struct Nfa<'r, 't> {
+    prog: &'r Program,
+    input: CharInput<'t>,
+}
+
+impl<'r, 't> Nfa<'r, 't> {
+    /// Execute the NFA matching engine.
+    ///
+    /// If there's a match, `exec` returns `true` and populates the given
+    /// captures accordingly.
+    pub fn exec(
+        prog: &'r Program,
+        mut caps: &mut CaptureIdxs,
+        text: &'t str,
+        start: usize,
+    ) -> bool {
+        let mut q = prog.nfa_threads.get();
+        let input = CharInput::new(text);
+        let at = input.at(start);
+        let matched = Nfa {
+            prog: prog,
+            input: input,
+        }.exec_(&mut q, &mut caps, at);
+        prog.nfa_threads.put(q);
+        matched
+    }
+
+    fn exec_(
+        &mut self,
+        mut q: &mut NfaThreads,
+        mut caps: &mut CaptureIdxs,
+        mut at: InputAt,
+    ) -> bool {
+        let mut matched = false;
+        q.clist.empty(); q.nlist.empty();
+'LOOP:  loop {
+            if q.clist.size == 0 {
+                // Three ways to bail out when our current set of threads is
+                // empty.
+                //
+                // 1. We have a match---so we're done exploring any possible
+                //    alternatives.  Time to quit.
+                //
+                // 2. If the expression starts with a '^' we can terminate as
+                //    soon as the last thread dies.
+                if matched
+                   || (!at.is_beginning() && self.prog.anchored_begin) {
+                    break;
+                }
+
+                // 3. If there's a literal prefix for the program, try to
+                //    jump ahead quickly. If it can't be found, then we can
+                //    bail out early.
+                if !self.prog.prefixes.is_empty() {
+                    at = match self.input.prefix_at(&self.prog.prefixes, at) {
+                        None => break,
+                        Some(at) => at,
+                    };
+                }
+            }
+
+            // This simulates a preceding '.*?' for every regex by adding
+            // a state starting at the current position in the input for the
+            // beginning of the program only if we don't already have a match.
+            if q.clist.size == 0 || (!self.prog.anchored_begin && !matched) {
+                self.add(&mut q.clist, &mut caps, 0, at)
+            }
+            // The previous call to "add" actually inspects the position just
+            // before the current character. For stepping through the machine,
+            // we can to look at the current character, so we advance the
+            // input.
+            let at_next = self.input.at(at.next_pos());
+            for i in 0..q.clist.size {
+                let pc = q.clist.pc(i);
+                let tcaps = q.clist.caps(i);
+                if self.step(&mut q.nlist, caps, tcaps, pc, at, at_next) {
+                    matched = true;
+                    if caps.len() == 0 {
+                        // If we only care if a match occurs (not its
+                        // position), then we can quit right now.
+                        break 'LOOP;
+                    }
+                    // We don't need to check the rest of the threads in this
+                    // set because we've matched something ("leftmost-first").
+                    // However, we still need to check threads in the next set
+                    // to support things like greedy matching.
+                    break;
+                }
+            }
+            if at.char().is_none() {
+                break;
+            }
+            at = at_next;
+            q.swap();
+            q.nlist.empty();
+        }
+        matched
+    }
+
+    fn step(
+        &self,
+        nlist: &mut Threads,
+        caps: &mut [Option<usize>],
+        thread_caps: &mut [Option<usize>],
+        pc: usize,
+        at: InputAt,
+        at_next: InputAt,
+    ) -> bool {
+        use program::Inst::*;
+        match self.prog.insts[pc] {
+            Match => {
+                for (slot, val) in caps.iter_mut().zip(thread_caps.iter()) {
+                    *slot = *val;
+                }
+                true
+            }
+            Char(ref inst) => {
+                if inst.matches(at.char()) {
+                    self.add(nlist, thread_caps, pc+1, at_next);
+                }
+                false
+            }
+            Ranges(ref inst) => {
+                if inst.matches(at.char()).is_some() {
+                    self.add(nlist, thread_caps, pc+1, at_next);
+                }
+                false
+            }
+            EmptyLook(_) | Save(_) | Jump(_) | Split(_, _) => false,
+        }
+    }
+
+    fn add(
+        &self,
+        nlist: &mut Threads,
+        thread_caps: &mut [Option<usize>],
+        pc: usize,
+        at: InputAt,
+    ) {
+        use program::Inst::*;
+
+        if nlist.contains(pc) {
+            return
+        }
+        let ti = nlist.add(pc);
+        match self.prog.insts[pc] {
+            EmptyLook(ref inst) => {
+                let prev = self.input.previous_at(at.pos());
+                if inst.matches(prev.char(), at.char()) {
+                    self.add(nlist, thread_caps, pc+1, at);
+                }
+            }
+            Save(slot) => {
+                if slot >= thread_caps.len() {
+                    self.add(nlist, thread_caps, pc+1, at);
+                } else {
+                    let old = thread_caps[slot];
+                    thread_caps[slot] = Some(at.pos());
+                    self.add(nlist, thread_caps, pc+1, at);
+                    thread_caps[slot] = old;
+                }
+            }
+            Jump(to) => {
+                self.add(nlist, thread_caps, to, at)
+            }
+            Split(x, y) => {
+                self.add(nlist, thread_caps, x, at);
+                self.add(nlist, thread_caps, y, at);
+            }
+            Match | Char(_) | Ranges(_) => {
+                let mut t = &mut nlist.thread(ti);
+                for (slot, val) in t.caps.iter_mut().zip(thread_caps.iter()) {
+                    *slot = *val;
+                }
+            }
+        }
+    }
+}
+
+/// Shared cached state between multiple invocations of a NFA engine
+/// in the same thread.
+///
+/// It is exported so that it can be cached by `program::Program`.
+#[derive(Debug)]
+pub struct NfaThreads {
+    clist: Threads,
+    nlist: Threads,
+}
+
+#[derive(Debug)]
+struct Threads {
+    dense: Vec<Thread>,
+    sparse: Vec<usize>,
+    size: usize,
+}
+
+#[derive(Clone, Debug)]
+struct Thread {
+    pc: usize,
+    caps: Vec<Option<usize>>,
+}
+
+impl NfaThreads {
+    /// Create new empty state for the NFA engine.
+    pub fn new(num_insts: usize, ncaps: usize) -> NfaThreads {
+        NfaThreads {
+            clist: Threads::new(num_insts, ncaps),
+            nlist: Threads::new(num_insts, ncaps),
+        }
+    }
+
+    fn swap(&mut self) {
+        ::std::mem::swap(&mut self.clist, &mut self.nlist);
+    }
+}
+
+impl Threads {
+    fn new(num_insts: usize, ncaps: usize) -> Threads {
+        let t = Thread { pc: 0, caps: vec![None; ncaps * 2] };
+        Threads {
+            dense: vec![t; num_insts],
+            sparse: vec![0; num_insts],
+            size: 0,
+        }
+    }
+
+    fn add(&mut self, pc: usize) -> usize {
+        let i = self.size;
+        self.dense[i].pc = pc;
+        self.sparse[pc] = i;
+        self.size += 1;
+        i
+    }
+
+    fn thread(&mut self, i: usize) -> &mut Thread {
+        &mut self.dense[i]
+    }
+
+    fn contains(&self, pc: usize) -> bool {
+        let s = self.sparse[pc];
+        s < self.size && self.dense[s].pc == pc
+    }
+
+    fn empty(&mut self) {
+        self.size = 0;
+    }
+
+    fn pc(&self, i: usize) -> usize {
+        self.dense[i].pc
+    }
+
+    fn caps(&mut self, i: usize) -> &mut [Option<usize>] {
+        &mut self.dense[i].caps
+    }
+}
diff --git a/src/pool.rs b/src/pool.rs
new file mode 100644
index 0000000000..cb29fb0346
--- /dev/null
+++ b/src/pool.rs
@@ -0,0 +1,93 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::cell::RefCell;
+use std::fmt;
+use std::sync::Mutex;
+
+/// A very simple memory pool for managing cached state.
+///
+/// This was motivated by a singular purpose: reduce the allocation overhead
+/// of matching engines.
+///
+/// With a pool, the matching engines need to allocate state each time they
+/// are invoked. If a regex is used once to check for a match and never again,
+/// then this is OK. But if a regex is used many times over, then not
+/// re-allocating the engine's state is a huge win. (A regex is commonly
+/// used many times, for example, with `find_iter`, `captures_iter` or
+/// `replace_all`.)
+///
+/// We use inherited mutability and ensure that each thread gets its own
+/// state. There is no limit on the number of states that are created. If a
+/// thread requests one and one isn't available, a new one is created.
+///
+/// (N.B. It seems like there exists a way to implement this with stronger
+/// guarantees, e.g., with a guard of some sort that puts the resource back
+/// in the pool when it is dropped. However, the use case for this pool is so
+/// simple and localized that it doesn't seem worth it.)
+pub struct Pool<T> {
+    stack: Mutex<RefCell<Vec<T>>>,
+    create: CreateFn<T>,
+}
+
+/// The type of the function used to create resources if none exist.
+pub type CreateFn<T> = Box<Fn() -> T + Send + Sync>;
+
+impl<T> Pool<T> {
+    /// Create a new pool.
+    ///
+    /// When a caller requests a resource from the pool and one does not
+    /// exist, then `create` is called to allocate a new resource for the
+    /// caller.
+    ///
+    /// It is up to the caller to put the resource back into the pool for
+    /// future reuse.
+    ///
+    /// All resources are created lazily/on-demand.
+    pub fn new(create: CreateFn<T>) -> Pool<T> {
+        Pool {
+            stack: Mutex::new(RefCell::new(vec![])),
+            create: create,
+        }
+    }
+
+    /// Request a resource from the pool.
+    ///
+    /// If no resources are available, a new one is created.
+    ///
+    /// The caller must return the resource to the pool, otherwise the pool
+    /// will not be able to reuse the resource.
+    pub fn get(&self) -> T {
+        let stack = self.stack.lock();
+        let stack = stack.unwrap();
+        let mut stack = stack.borrow_mut();
+        match stack.pop() {
+            None => (self.create)(),
+            Some(v) => v,
+        }
+    }
+
+    /// Add a resource to the pool.
+    ///
+    /// This makes the resource available for use with `get`.
+    pub fn put(&self, v: T) {
+        let stack = self.stack.lock();
+        let stack = stack.unwrap();
+        stack.borrow_mut().push(v);
+    }
+}
+
+impl<T: fmt::Debug> fmt::Debug for Pool<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let stack = self.stack.lock();
+        let stack = stack.unwrap();
+        stack.fmt(f)
+    }
+}
diff --git a/src/prefix.rs b/src/prefix.rs
new file mode 100644
index 0000000000..d11ddcf5a1
--- /dev/null
+++ b/src/prefix.rs
@@ -0,0 +1,106 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use aho_corasick::AcAutomaton;
+use memchr::memchr;
+
+/// A prefix extracted from a compiled regular expression.
+///
+/// A regex prefix is a set of literal strings that *must* be matched at the
+/// beginning of a regex in order for the entire regex to match.
+///
+/// There are a variety of ways to efficiently scan the search text for a
+/// prefix. Currently, there are two implemented:
+///
+/// 1. The prefix is a single byte. Just use memchr.
+/// 2. If the prefix is a set of two or more single byte prefixes, then
+///    a single sparse map is created. Checking if there is a match is a lookup
+///    in this map for each byte in the search text.
+/// 3. In all other cases, build an Aho-Corasick automaton.
+///
+/// It's possible that there's room here for other substring algorithms,
+/// such as Boyer-Moore for single-set prefixes greater than 1, or Rabin-Karp
+/// for small sets of same-length prefixes.
+#[derive(Clone, Debug)]
+pub enum Prefix {
+    /// No prefixes. (Never advances through the input.)
+    Empty,
+    /// A single byte prefix.
+    Single(u8),
+    /// A set of two or more single byte prefixes.
+    /// This could be reduced to a bitset, which would use only 8 bytes,
+    /// but I don't think we care.
+    Singles(Vec<bool>),
+    /// A full Aho-Corasick DFA automaton.
+    Automaton(AcAutomaton),
+}
+
+impl Prefix {
+    /// Create a new prefix matching machine.
+    pub fn new(pfxs: Vec<String>) -> Prefix {
+        if pfxs.len() == 0 || pfxs[0].len() == 0 {
+            Prefix::Empty
+        } else if pfxs.len() == 1 && pfxs[0].len() == 1 {
+            Prefix::Single(pfxs[0].as_bytes()[0])
+        } else if pfxs.len() >= 2 && pfxs.iter().all(|s| s.len() == 1) {
+            let mut set = vec![false; 256];
+            for p in pfxs {
+                set[p.as_bytes()[0] as usize] = true;
+            }
+            Prefix::Singles(set)
+        } else {
+            Prefix::Automaton(AcAutomaton::new(pfxs))
+        }
+    }
+
+    /// Find the position of a prefix in `haystack` if it exists.
+    ///
+    /// In the matching engines, we only actually need the starting index
+    /// because the prefix is used to only skip ahead---the matching engine
+    /// still needs to run over the prefix input. However, we return the ending
+    /// location as well in case the prefix corresponds to the entire regex,
+    /// in which case, you need the end of the match.
+    pub fn find(&self, haystack: &str) -> Option<(usize, usize)> {
+        use self::Prefix::*;
+        match *self {
+            Empty => Some((0, 0)),
+            Single(b) => memchr(b, haystack.as_bytes()).map(|i| (i, i+1)),
+            Singles(ref pats) => find_singles(pats, haystack.as_bytes()),
+            Automaton(ref aut) => {
+                aut.find(haystack).next().map(|m| (m.start, m.end))
+            }
+        }
+    }
+
+    /// Returns true iff this prefix is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns the number of prefixes in this machine.
+    pub fn len(&self) -> usize {
+        match *self {
+            Prefix::Empty => 0,
+            Prefix::Single(_) => 1,
+            Prefix::Singles(ref pats) => pats.len(),
+            Prefix::Automaton(ref aut) => aut.len(),
+        }
+    }
+}
+
+/// A very quick scan for multiple single byte prefixes using a sparse map.
+fn find_singles(pats: &[bool], haystack: &[u8]) -> Option<(usize, usize)> {
+    for (hi, &b) in haystack.iter().enumerate() {
+        if pats[b as usize] {
+            return Some((hi, hi+1));
+        }
+    }
+    None
+}
diff --git a/src/program.rs b/src/program.rs
new file mode 100644
index 0000000000..6986667a92
--- /dev/null
+++ b/src/program.rs
@@ -0,0 +1,492 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::cmp::{self, Ordering};
+
+use syntax;
+
+use Error;
+use backtrack::{Backtrack, BackMachine};
+use char::Char;
+use compile::Compiler;
+use nfa::{Nfa, NfaThreads};
+use pool::Pool;
+use prefix::Prefix;
+use re::CaptureIdxs;
+
+const NUM_PREFIX_LIMIT: usize = 30;
+const PREFIX_LENGTH_LIMIT: usize = 15;
+
+pub type InstIdx = usize;
+
+/// An instruction, the underlying unit of a compiled regular expression
+#[derive(Clone, Debug)]
+pub enum Inst {
+    /// A match has occurred.
+    /// This is always the last instruction and only occurs in a single spot.
+    /// We could special case this in the code, but it is much clearer to
+    /// handle it as a proper instruction.
+    Match,
+    /// Save the current location in the input into the given capture location.
+    Save(usize),
+    /// Jump to the instruction given.
+    Jump(InstIdx),
+    /// Match either instruction, preferring the first.
+    Split(InstIdx, InstIdx),
+    /// A zero-width instruction. When this instruction matches, the input
+    /// is not advanced.
+    EmptyLook(LookInst),
+    /// Match a single possibly case insensitive character.
+    Char(OneChar),
+    /// Match one or more possibly case insensitive character ranges.
+    Ranges(CharRanges),
+}
+
+/// A single character instruction.
+#[derive(Clone, Debug)]
+pub struct OneChar {
+    /// The character.
+    pub c: char,
+    /// True if the character should be matched case insensitively.
+    /// (i.e., The input character will need to be case folded.)
+    pub casei: bool,
+}
+
+/// A multi-range character class instruction.
+#[derive(Clone, Debug)]
+pub struct CharRanges {
+    /// Sorted sequence of non-overlapping ranges.
+    pub ranges: Vec<(char, char)>,
+    /// Whether to match case insensitively.
+    pub casei: bool,
+}
+
+/// The set of zero-width match instructions.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum LookInst {
+    /// Start of line or input.
+    StartLine,
+    /// End of line or input.
+    EndLine,
+    /// Start of input.
+    StartText,
+    /// End of input.
+    EndText,
+    /// Word character on one side and non-word character on other.
+    WordBoundary,
+    /// Word character on both sides or non-word character on both sides.
+    NotWordBoundary,
+}
+
+impl OneChar {
+    /// Tests whether the given input character matches this instruction.
+    #[inline(always)] // About ~5-15% more throughput then `#[inline]`
+    pub fn matches(&self, c: Char) -> bool {
+        self.c == c || (self.casei && self.c == c.case_fold())
+    }
+}
+
+impl CharRanges {
+    /// Emits a range specifically for the `.` expression.
+    pub fn any() -> CharRanges {
+        CharRanges {
+            ranges: vec![('\x00', '\u{10ffff}')],
+            casei: false,
+        }
+    }
+
+    /// Emits a range specifically for the `(?s).` expression.
+    pub fn any_nonl() -> CharRanges {
+        CharRanges {
+            ranges: vec![('\x00', '\x09'), ('\x0B', '\u{10ffff}')],
+            casei: false,
+        }
+    }
+
+    /// Emits a range from the AST character class.
+    pub fn from_class(cls: syntax::CharClass) -> CharRanges {
+        let casei = cls.is_case_insensitive();
+        CharRanges {
+            ranges: cls.into_iter().map(|r| (r.start, r.end)).collect(),
+            casei: casei,
+        }
+    }
+
+    /// Tests whether the given input character matches this instruction.
+    #[inline(always)] // About ~5-15% more throughput then `#[inline]`
+    pub fn matches(&self, mut c: Char) -> Option<usize> {
+        if self.casei {
+            c = c.case_fold();
+        }
+        // This speeds up the `match_class_unicode` benchmark by checking
+        // some common cases quickly without binary search. e.g., Matching
+        // a Unicode class on predominantly ASCII text.
+        for i in 0..cmp::min(self.ranges.len(), 4) {
+            let r = self.ranges[i];
+            if c < r.0 {
+                return None;
+            }
+            if c <= r.1 {
+                return Some(i);
+            }
+        }
+        self.ranges.binary_search_by(|r| {
+            if r.1 < c {
+                Ordering::Less
+            } else if r.0 > c {
+                Ordering::Greater
+            } else {
+                Ordering::Equal
+            }
+        }).ok()
+    }
+}
+
+impl LookInst {
+    /// Tests whether the pair of characters matches this zero-width
+    /// instruction.
+    pub fn matches(&self, c1: Char, c2: Char) -> bool {
+        use self::LookInst::*;
+        match *self {
+            StartLine => c1.is_none() || c1 == '\n',
+            EndLine => c2.is_none() || c2 == '\n',
+            StartText => c1.is_none(),
+            EndText => c2.is_none(),
+            ref wbty => {
+                let (w1, w2) = (c1.is_word_char(), c2.is_word_char());
+                (*wbty == WordBoundary && w1 ^ w2)
+                || (*wbty == NotWordBoundary && !(w1 ^ w2))
+            }
+        }
+    }
+}
+
+/// The matching engines offered by this regex implementation.
+///
+/// N.B. This is exported for use in testing.
+#[doc(hidden)]
+#[derive(Clone, Copy, Debug)]
+pub enum MatchEngine {
+    /// A bounded backtracking implementation. About twice as fast as the
+    /// NFA, but can only work on small regexes and small input.
+    Backtrack,
+    /// A full NFA simulation. Can always be employed but almost always the
+    /// slowest choice.
+    Nfa,
+    /// If the entire regex is a literal and no capture groups have been
+    /// requested, then we can degrade to a simple substring match.
+    Literals,
+}
+
+/// Program represents a compiled regular expression. Once an expression is
+/// compiled, its representation is immutable and will never change.
+/// (Well, almost. In fact, the matching engines cache state that can be
+/// reused on subsequent searches. But this is interior mutability that
+/// shouldn't be observable by the caller.)
+#[derive(Debug)]
+pub struct Program {
+    /// The original regular expression string.
+    pub original: String,
+    /// A sequence of instructions.
+    pub insts: Vec<Inst>,
+    /// The sequence of capture group names. There is an entry for each capture
+    /// group index and a name exists only if the capture group is named.
+    pub cap_names: Vec<Option<String>>,
+    /// If the regular expression requires a literal prefix in order to have a
+    /// match, that prefix is stored here as a DFA.
+    pub prefixes: Prefix,
+    /// True iff matching any literal prefix indicates a match.
+    pub prefixes_complete: bool,
+    /// True iff program is anchored at the beginning.
+    pub anchored_begin: bool,
+    /// True iff program is anchored at the end.
+    pub anchored_end: bool,
+    /// The type of matching engine to use.
+    /// When `None` (the default), pick an engine automatically.
+    pub engine: Option<MatchEngine>,
+    /// Cached NFA threads.
+    pub nfa_threads: Pool<NfaThreads>,
+    /// Cached backtracking memory.
+    pub backtrack: Pool<BackMachine>,
+}
+
+impl Program {
+    /// Compiles a Regex.
+    pub fn new(
+        engine: Option<MatchEngine>,
+        size_limit: usize,
+        re: &str,
+    ) -> Result<Program, Error> {
+        let expr = try!(syntax::Expr::parse(re));
+        let (insts, cap_names) = try!(Compiler::new(size_limit).compile(expr));
+        let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
+        let create_threads = move || NfaThreads::new(insts_len, ncaps);
+        let create_backtrack = move || BackMachine::new();
+        let mut prog = Program {
+            original: re.into(),
+            insts: insts,
+            cap_names: cap_names,
+            prefixes: Prefix::Empty,
+            prefixes_complete: false,
+            anchored_begin: false,
+            anchored_end: false,
+            engine: engine,
+            nfa_threads: Pool::new(Box::new(create_threads)),
+            backtrack: Pool::new(Box::new(create_backtrack)),
+        };
+
+        prog.find_prefixes();
+        prog.anchored_begin = match prog.insts[1] {
+            Inst::EmptyLook(LookInst::StartText) => true,
+            _ => false,
+        };
+        prog.anchored_end = match prog.insts[prog.insts.len() - 3] {
+            Inst::EmptyLook(LookInst::EndText) => true,
+            _ => false,
+        };
+        Ok(prog)
+    }
+
+    /// Executes a compiled regex program.
+    pub fn exec(
+        &self,
+        caps: &mut CaptureIdxs,
+        text: &str,
+        start: usize,
+    ) -> bool {
+        match self.choose_engine(caps.len(), text) {
+            MatchEngine::Backtrack => Backtrack::exec(self, caps, text, start),
+            MatchEngine::Nfa => Nfa::exec(self, caps, text, start),
+            MatchEngine::Literals => {
+                match self.prefixes.find(&text[start..]) {
+                    None => false,
+                    Some((s, e)) => {
+                        if caps.len() == 2 {
+                            caps[0] = Some(start + s);
+                            caps[1] = Some(start + e);
+                        }
+                        true
+                    }
+                }
+            }
+        }
+    }
+
+    fn choose_engine(&self, cap_len: usize, text: &str) -> MatchEngine {
+        // If the engine is already chosen, then we use it.
+        // But that might not be a good idea. e.g., What if `Literals` is
+        // chosen and it can't work? I guess we should probably check whether
+        // the chosen engine is appropriate or not.
+        self.engine.unwrap_or_else(|| {
+            if cap_len <= 2
+               && self.prefixes.len() == 1
+               && self.prefixes_complete {
+                // We can only use this when the regex is entirely a literal
+                // (not an alternation of literals).
+                // The reason (for now) is that the prefix DFA doesn't handle
+                // priority the same way the regex engine does.
+                // e.g., given `ab|a`, the prefix DFA would report `a` as a
+                // match in the string `ab`, when in fact, `ab` should match.
+                //
+                // But, we can still get major winnings by avoiding the
+                // matching engine for a single literal.
+                //
+                // I guess we could teach Aho-Corasick about priority, but we
+                // might as well just implement a full DFA.
+                MatchEngine::Literals
+            } else if Backtrack::should_exec(self, text) {
+                // We're only here if the input and regex combined are small.
+                MatchEngine::Backtrack
+            } else {
+                MatchEngine::Nfa
+            }
+        })
+    }
+
+    /// Returns the total number of capture groups in the regular expression.
+    /// This includes the zeroth capture.
+    pub fn num_captures(&self) -> usize {
+        num_captures(&self.insts)
+    }
+
+    /// Allocate new capture groups.
+    pub fn alloc_captures(&self) -> Vec<Option<usize>> {
+        vec![None; 2 * self.num_captures()]
+    }
+
+    /// Find and store a prefix machine for the current program.
+    pub fn find_prefixes(&mut self) {
+        use self::Inst::*;
+
+        let (ps, complete) = self.prefixes_from_insts(1);
+        if ps.len() > 0 {
+            self.prefixes = Prefix::new(ps);
+            self.prefixes_complete = complete;
+            return;
+        }
+        let mut pc = 1;
+        let mut prefixes = vec![];
+        let mut pcomplete = true;
+        while let Split(x, y) = self.insts[pc] {
+            let (xps, xcomplete) = self.prefixes_from_insts(x);
+            let (yps, ycomplete) = self.prefixes_from_insts(y);
+            let mut done = false;
+            match (&self.insts[x], &self.insts[y]) {
+                // We should be able to support this. Add explicit stack. ---AG
+                (&Split(_, _), &Split(_, _)) => return,
+                (_, &Split(_, _)) if xps.len() == 0 => return,
+                (_, &Split(_, _)) => {
+                    pcomplete = pcomplete && xcomplete;
+                    prefixes.extend(xps);
+                    pc = y;
+                }
+                (&Split(_, _), _) if yps.len() == 0 => return,
+                (&Split(_, _), _) => {
+                    pcomplete = pcomplete && ycomplete;
+                    prefixes.extend(yps);
+                    pc = x;
+                }
+                _ if xps.len() == 0 || yps.len() == 0 => return,
+                // This is our base case. We've followed splits the whole
+                // way, which means both instructions lead to a match.
+                _ => {
+                    pcomplete = pcomplete && xcomplete && ycomplete;
+                    prefixes.extend(xps);
+                    prefixes.extend(yps);
+                    done = true;
+                }
+            }
+            // Arg. We've over-extended ourselves, quit with nothing to
+            // show for it.
+            if prefixes.len() > NUM_PREFIX_LIMIT {
+                return;
+            }
+            if done { break; }
+        }
+        self.prefixes_complete = pcomplete;
+        self.prefixes = Prefix::new(prefixes);
+    }
+
+    /// Find a prefix starting at the given instruction.
+    ///
+    /// Returns `true` in the tuple if the end of the prefix leads trivially
+    /// to a match. (This may report false negatives, but being conservative
+    /// is OK.)
+    fn prefixes_from_insts(&self, mut pc: usize) -> (Vec<String>, bool) {
+        use self::Inst::*;
+
+        let mut complete = true;
+        let mut alts = vec![String::new()];
+        while pc < self.insts.len() {
+            let inst = &self.insts[pc];
+
+            // Each iteration adds one character to every alternate prefix *or*
+            // it stops. Thus, the prefix alternates grow in lock step, and it
+            // suffices to check one of them to see if the prefix limit has been
+            // exceeded.
+            if alts[0].len() > PREFIX_LENGTH_LIMIT {
+                complete = false;
+                break;
+            }
+            match *inst {
+                Save(_) => { pc += 1; continue } // completely ignore it
+                Char(OneChar { c, casei: false }) => {
+                    for alt in &mut alts {
+                        alt.push(c);
+                    }
+                    pc += 1;
+                }
+                Ranges(CharRanges { ref ranges, casei: false }) => {
+                    let nchars = num_chars_in_ranges(ranges);
+                    if alts.len() * nchars > NUM_PREFIX_LIMIT {
+                        complete = false;
+                        break;
+                    }
+
+                    let orig = alts;
+                    alts = Vec::with_capacity(orig.len());
+                    for &(s, e) in ranges {
+                        for c in (s as u32)..(e as u32 + 1){
+                            for alt in &orig {
+                                let mut alt = alt.clone();
+                                alt.push(::std::char::from_u32(c).unwrap());
+                                alts.push(alt);
+                            }
+                        }
+                    }
+                    pc += 1;
+                }
+                Jump(pc2) => pc = pc2,
+                _ => { complete = self.leads_to_match(pc); break }
+            }
+        }
+        if alts[0].len() == 0 {
+            (vec![], false)
+        } else {
+            (alts, complete)
+        }
+    }
+
+    fn leads_to_match(&self, mut pc: usize) -> bool {
+        // I'm pretty sure this is conservative, so it might have some
+        // false negatives.
+        loop {
+            match self.insts[pc] {
+                Inst::Match => return true,
+                Inst::Save(_) => pc += 1,
+                Inst::Jump(pc2) => pc = pc2,
+                _ => return false,
+            }
+        }
+    }
+}
+
+impl Clone for Program {
+    fn clone(&self) -> Program {
+        let (insts_len, ncaps) = (self.insts.len(), self.num_captures());
+        let create_threads = move || NfaThreads::new(insts_len, ncaps);
+        let create_backtrack = move || BackMachine::new();
+        Program {
+            original: self.original.clone(),
+            insts: self.insts.clone(),
+            cap_names: self.cap_names.clone(),
+            prefixes: self.prefixes.clone(),
+            prefixes_complete: self.prefixes_complete,
+            anchored_begin: self.anchored_begin,
+            anchored_end: self.anchored_end,
+            engine: self.engine,
+            nfa_threads: Pool::new(Box::new(create_threads)),
+            backtrack: Pool::new(Box::new(create_backtrack)),
+        }
+    }
+}
+
+/// Return the number of captures in the given sequence of instructions.
+fn num_captures(insts: &[Inst]) -> usize {
+    let mut n = 0;
+    for inst in insts {
+        match *inst {
+            Inst::Save(c) => n = cmp::max(n, c+1),
+            _ => {}
+        }
+    }
+    // There's exactly 2 Save slots for every capture.
+    n / 2
+}
+
+/// Count the number of characters in the given range.
+///
+/// This is useful for pre-emptively limiting the number of prefix literals
+/// we extract from a regex program.
+fn num_chars_in_ranges(ranges: &[(char, char)]) -> usize {
+    ranges.iter()
+          .map(|&(s, e)| (e as u32) - (s as u32))
+          .fold(0, |acc, len| acc + len) as usize
+}
diff --git a/src/re.rs b/src/re.rs
index f3eb7b19b1..556ad8f83d 100644
--- a/src/re.rs
+++ b/src/re.rs
@@ -1,4 +1,4 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -16,14 +16,21 @@ use std::fmt;
 use std::str::pattern::{Pattern, Searcher, SearchStep};
 use std::str::FromStr;
 
-use compile::Program;
+use program::{Program, MatchEngine};
 use syntax;
-use vm;
-use vm::CaptureLocs;
-use vm::MatchKind::{self, Exists, Location, Submatches};
 
-use self::NamesIter::*;
-use self::Regex::*;
+const REPLACE_EXPAND: &'static str = r"(?x)
+  (?P<before>^|\b|[^$]) # Ignore `$$name`.
+  \$
+  (?P<name> # Match the actual capture name. Can be...
+    [0-9]+  # A sequence of digits (for indexed captures), or...
+    |
+    [_a-zA-Z][_0-9a-zA-Z]* # A name for named captures.
+  )
+";
+
+/// Type alias for representing capture indices.
+pub type CaptureIdxs = [Option<usize>];
 
 /// Escapes all regular expression meta characters in `text`.
 ///
@@ -166,20 +173,11 @@ pub enum Regex {
     // See the comments for the `program` module in `lib.rs` for a more
     // detailed explanation for what `regex!` requires.
     #[doc(hidden)]
-    Dynamic(ExDynamic),
+    Dynamic(Program),
     #[doc(hidden)]
     Native(ExNative),
 }
 
-#[derive(Clone)]
-#[doc(hidden)]
-pub struct ExDynamic {
-    original: String,
-    names: Vec<Option<String>>,
-    #[doc(hidden)]
-    pub prog: Program
-}
-
 #[doc(hidden)]
 pub struct ExNative {
     #[doc(hidden)]
@@ -187,7 +185,7 @@ pub struct ExNative {
     #[doc(hidden)]
     pub names: &'static &'static [Option<&'static str>],
     #[doc(hidden)]
-    pub prog: fn(MatchKind, &str, usize, usize) -> Vec<Option<usize>>
+    pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool,
 }
 
 impl Copy for ExNative {}
@@ -250,13 +248,29 @@ impl Regex {
     ///
     /// The default size limit used in `new` is 10MB.
     pub fn with_size_limit(size: usize, re: &str) -> Result<Regex, Error> {
-        let ast = try!(syntax::Expr::parse(re));
-        let (prog, names) = try!(Program::new(ast, size));
-        Ok(Dynamic(ExDynamic {
-            original: re.to_string(),
-            names: names,
-            prog: prog,
-        }))
+        Regex::with_engine(None, size, re)
+    }
+
+    /// Compiles a dynamic regular expression and uses given matching engine.
+    ///
+    /// This is exposed for use in testing and shouldn't be used by clients.
+    /// Instead, the regex program should choose the correct matching engine
+    /// to use automatically. (Based on the regex, the size of the input and
+    /// the type of search.)
+    ///
+    /// A value of `None` means that the engine is automatically selected,
+    /// which is the default behavior.
+    ///
+    /// **WARNING**: Passing an unsuitable engine for the given regex/input
+    /// could lead to bad things. (Not unsafe things, but panics, incorrect
+    /// matches and large memory use are all things that could happen.)
+    #[doc(hidden)]
+    pub fn with_engine(
+        engine: Option<MatchEngine>,
+        size: usize,
+        re: &str,
+    ) -> Result<Regex, Error> {
+        Program::new(engine, size, re).map(Regex::Dynamic)
     }
 
 
@@ -271,12 +285,11 @@ impl Regex {
     /// # extern crate regex; use regex::Regex;
     /// # fn main() {
     /// let text = "I categorically deny having triskaidekaphobia.";
-    /// let matched = Regex::new(r"\b\w{13}\b").unwrap().is_match(text);
-    /// assert!(matched);
+    /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
     /// # }
     /// ```
     pub fn is_match(&self, text: &str) -> bool {
-        has_match(&exec(self, Exists, text))
+        exec(self, &mut [], text, 0)
     }
 
     /// Returns the start and end byte range of the leftmost-first match in
@@ -300,8 +313,8 @@ impl Regex {
     /// # }
     /// ```
     pub fn find(&self, text: &str) -> Option<(usize, usize)> {
-        let caps = exec(self, Location, text);
-        if has_match(&caps) {
+        let mut caps = [None, None];
+        if exec(self, &mut caps, text, 0) {
             Some((caps[0].unwrap(), caps[1].unwrap()))
         } else {
             None
@@ -392,8 +405,12 @@ impl Regex {
     /// The `0`th capture group is always unnamed, so it must always be
     /// accessed with `at(0)`.
     pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
-        let caps = exec(self, Submatches, text);
-        Captures::new(self, text, caps)
+        let mut caps = self.alloc_captures();
+        if exec(self, &mut caps, text, 0) {
+            Some(Captures::new(self, text, caps))
+        } else {
+            None
+        }
     }
 
     /// Returns an iterator over all the non-overlapping capture groups matched
@@ -579,17 +596,29 @@ impl Regex {
         let mut new = String::with_capacity(text.len());
         let mut last_match = 0;
 
-        for (i, cap) in self.captures_iter(text).enumerate() {
-            // It'd be nicer to use the 'take' iterator instead, but it seemed
-            // awkward given that '0' => no limit.
-            if limit > 0 && i >= limit {
-                break
+        if rep.no_expand().is_some() {
+            // borrow checker pains. `rep` is borrowed mutably in the `else`
+            // branch below.
+            let rep = rep.no_expand().unwrap();
+            for (i, (s, e)) in self.find_iter(text).enumerate() {
+                if limit > 0 && i >= limit {
+                    break
+                }
+                new.push_str(&text[last_match..s]);
+                new.push_str(&rep);
+                last_match = e;
+            }
+        } else {
+            for (i, cap) in self.captures_iter(text).enumerate() {
+                if limit > 0 && i >= limit {
+                    break
+                }
+                // unwrap on 0 is OK because captures only reports matches
+                let (s, e) = cap.pos(0).unwrap();
+                new.push_str(&text[last_match..s]);
+                new.push_str(&rep.reg_replace(&cap));
+                last_match = e;
             }
-
-            let (s, e) = cap.pos(0).unwrap(); // captures only reports matches
-            new.push_str(&text[last_match..s]);
-            new.push_str(&rep.reg_replace(&cap));
-            last_match = e;
         }
         new.push_str(&text[last_match..]);
         return new;
@@ -598,31 +627,37 @@ impl Regex {
     /// Returns the original string of this regex.
     pub fn as_str<'a>(&'a self) -> &'a str {
         match *self {
-            Dynamic(ExDynamic { ref original, .. }) => original,
-            Native(ExNative { ref original, .. }) => original,
+            Regex::Dynamic(Program { ref original, .. }) => original,
+            Regex::Native(ExNative { ref original, .. }) => original,
         }
     }
 
     #[doc(hidden)]
     pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
         match *self {
-            Native(ref n) => NamesIterNative(n.names.iter()),
-            Dynamic(ref d) => NamesIterDynamic(d.names.iter())
+            Regex::Native(ref n) => NamesIter::Native(n.names.iter()),
+            Regex::Dynamic(ref d) => NamesIter::Dynamic(d.cap_names.iter())
         }
     }
 
     fn names_len(&self) -> usize {
         match *self {
-            Native(ref n) => n.names.len(),
-            Dynamic(ref d) => d.names.len()
+            Regex::Native(ref n) => n.names.len(),
+            Regex::Dynamic(ref d) => d.cap_names.len()
         }
     }
 
+    fn alloc_captures(&self) -> Vec<Option<usize>> {
+        match *self {
+            Regex::Native(ref n) => vec![None; 2 * n.names.len()],
+            Regex::Dynamic(ref d) => d.alloc_captures(),
+        }
+    }
 }
 
 pub enum NamesIter<'a> {
-    NamesIterNative(::std::slice::Iter<'a, Option<&'static str>>),
-    NamesIterDynamic(::std::slice::Iter<'a, Option<String>>)
+    Native(::std::slice::Iter<'a, Option<&'static str>>),
+    Dynamic(::std::slice::Iter<'a, Option<String>>)
 }
 
 impl<'a> Iterator for NamesIter<'a> {
@@ -630,8 +665,10 @@ impl<'a> Iterator for NamesIter<'a> {
 
     fn next(&mut self) -> Option<Option<String>> {
         match *self {
-            NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_string())),
-            NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_string())),
+            NamesIter::Native(ref mut i) =>
+                i.next().map(|x| x.map(|s| s.to_owned())),
+            NamesIter::Dynamic(ref mut i) =>
+                i.next().map(|x| x.as_ref().map(|s| s.to_owned())),
         }
     }
 }
@@ -653,24 +690,39 @@ pub trait Replacer {
     /// The `'a` lifetime refers to the lifetime of a borrowed string when
     /// a new owned string isn't needed (e.g., for `NoExpand`).
     fn reg_replace<'a>(&'a mut self, caps: &Captures) -> Cow<'a, str>;
+
+    /// Returns a possibly owned string that never needs expansion.
+    fn no_expand<'a>(&'a mut self) -> Option<Cow<'a, str>> { None }
 }
 
 impl<'t> Replacer for NoExpand<'t> {
     fn reg_replace<'a>(&'a mut self, _: &Captures) -> Cow<'a, str> {
-        let NoExpand(s) = *self;
-        Cow::Borrowed(s)
+        self.0.into()
+    }
+
+    fn no_expand<'a>(&'a mut self) -> Option<Cow<'a, str>> {
+        Some(self.0.into())
     }
 }
 
 impl<'t> Replacer for &'t str {
     fn reg_replace<'a>(&'a mut self, caps: &Captures) -> Cow<'a, str> {
-        Cow::Owned(caps.expand(*self))
+        caps.expand(*self).into()
+    }
+
+    fn no_expand<'a>(&'a mut self) -> Option<Cow<'a, str>> {
+        let re = Regex::new(REPLACE_EXPAND).unwrap();
+        if !re.is_match(self) {
+            Some((*self).into())
+        } else {
+            None
+        }
     }
 }
 
 impl<F> Replacer for F where F: FnMut(&Captures) -> String {
     fn reg_replace<'a>(&'a mut self, caps: &Captures) -> Cow<'a, str> {
-        Cow::Owned((*self)(caps))
+        (*self)(caps).into()
     }
 }
 
@@ -750,37 +802,33 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
 /// `'t` is the lifetime of the matched text.
 pub struct Captures<'t> {
     text: &'t str,
-    locs: CaptureLocs,
+    locs: Vec<Option<usize>>,
     named: Option<HashMap<String, usize>>,
 }
 
 impl<'t> Captures<'t> {
-    fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
-          -> Option<Captures<'t>> {
-        if !has_match(&locs) {
-            return None
-        }
-
+    fn new(
+        re: &Regex,
+        search: &'t str,
+        locs: Vec<Option<usize>>,
+    ) -> Captures<'t> {
         let named =
             if re.names_len() == 0 {
                 None
             } else {
                 let mut named = HashMap::new();
                 for (i, name) in re.names_iter().enumerate() {
-                    match name {
-                        None => {},
-                        Some(name) => {
-                            named.insert(name, i);
-                        }
+                    if let Some(name) = name {
+                        named.insert(name, i);
                     }
                 }
                 Some(named)
             };
-        Some(Captures {
+        Captures {
             text: search,
             locs: locs,
             named: named,
-        })
+        }
     }
 
     /// Returns the start and end positions of the Nth capture group.
@@ -856,15 +904,7 @@ impl<'t> Captures<'t> {
     /// To write a literal `$` use `$$`.
     pub fn expand(&self, text: &str) -> String {
         // How evil can you get?
-        let re = Regex::new(r"(?x)
-          (?P<before>^|\b|[^$]) # Ignore `$$name`.
-          \$
-          (?P<name> # Match the actual capture name. Can be...
-            [0-9]+  # A sequence of digits (for indexed captures), or...
-            |
-            [_a-zA-Z][_0-9a-zA-Z]* # A name for named captures.
-          )
-        ").unwrap();
+        let re = Regex::new(REPLACE_EXPAND).unwrap();
         let text = re.replace_all(text, |refs: &Captures| -> String {
             let before = refs.name("before").unwrap_or("");
             let name = refs.name("name").unwrap_or("");
@@ -974,14 +1014,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
             return None
         }
 
-        let caps = exec_slice(self.re, Submatches, self.search,
-                              self.last_end, self.search.len());
-        let (s, e) =
-            if !has_match(&caps) {
-                return None
-            } else {
-                (caps[0].unwrap(), caps[1].unwrap())
-            };
+        let mut caps = self.re.alloc_captures();
+        if !exec(self.re, &mut caps, self.search, self.last_end) {
+            return None
+        }
+        let (s, e) = (caps[0].unwrap(), caps[1].unwrap());
 
         // Don't accept empty matches immediately following a match.
         // i.e., no infinite loops please.
@@ -995,7 +1032,7 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
         }
         self.last_end = e;
         self.last_match = Some(self.last_end);
-        Captures::new(self.re, self.search, caps)
+        Some(Captures::new(self.re, self.search, caps))
     }
 }
 
@@ -1022,14 +1059,11 @@ impl<'r, 't> Iterator for FindMatches<'r, 't> {
             return None
         }
 
-        let caps = exec_slice(self.re, Location, self.search,
-                              self.last_end, self.search.len());
-        let (s, e) =
-            if !has_match(&caps) {
-                return None
-            } else {
-                (caps[0].unwrap(), caps[1].unwrap())
-            };
+        let mut caps = [None, None];
+        if !exec(self.re, &mut caps, self.search, self.last_end) {
+            return None;
+        }
+        let (s, e) = (caps[0].unwrap(), caps[1].unwrap());
 
         // Don't accept empty matches immediately following a match.
         // i.e., no infinite loops please.
@@ -1106,19 +1140,9 @@ unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
     }
 }
 
-fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
-    exec_slice(re, which, input, 0, input.len())
-}
-
-fn exec_slice(re: &Regex, which: MatchKind,
-              input: &str, s: usize, e: usize) -> CaptureLocs {
+fn exec(re: &Regex, caps: &mut CaptureIdxs, text: &str, start: usize) -> bool {
     match *re {
-        Dynamic(ExDynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
-        Native(ExNative { ref prog, .. }) => (*prog)(which, input, s, e),
+        Regex::Native(ExNative { ref prog, .. }) => (*prog)(caps, text, start),
+        Regex::Dynamic(ref prog) => prog.exec(caps, text, start),
     }
 }
-
-#[inline]
-fn has_match(caps: &CaptureLocs) -> bool {
-    caps.len() >= 2 && caps[0].is_some() && caps[1].is_some()
-}
diff --git a/src/vm.rs b/src/vm.rs
deleted file mode 100644
index 7fcd7fded8..0000000000
--- a/src/vm.rs
+++ /dev/null
@@ -1,531 +0,0 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// FIXME: Currently, the VM simulates an NFA. It would be nice to have another
-// VM that simulates a DFA.
-//
-// According to Russ Cox[1], a DFA performs better than an NFA, principally
-// because it reuses states previously computed by the machine *and* doesn't
-// keep track of capture groups. The drawback of a DFA (aside from its
-// complexity) is that it can't accurately return the locations of submatches.
-// The NFA *can* do that. (This is my understanding anyway.)
-//
-// Cox suggests that a DFA ought to be used to answer "does this match" and
-// "where does it match" questions. (In the latter, the starting position of
-// the match is computed by executing the regex backwards.) Cox also suggests
-// that a DFA should be run when asking "where are the submatches", which can
-// 1) quickly answer "no" is there's no match and 2) discover the substring
-// that matches, which means running the NFA on smaller input.
-//
-// Currently, the NFA simulation implemented below does some dirty tricks to
-// avoid tracking capture groups when they aren't needed (which only works
-// for 'is_match', not 'find'). This is a half-measure, but does provide some
-// perf improvement.
-//
-// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go.
-//
-// [1] - http://swtch.com/~rsc/regex/regex3.html
-
-use self::MatchKind::*;
-use self::StepState::*;
-
-use std::cmp;
-use std::mem;
-
-use compile::Program;
-use compile::Inst::*;
-use syntax;
-
-pub type CaptureLocs = Vec<Option<usize>>;
-
-/// Indicates the type of match to be performed by the VM.
-#[derive(Copy, Clone)]
-pub enum MatchKind {
-    /// Only checks if a match exists or not. Does not return location.
-    Exists,
-    /// Returns the start and end indices of the entire match in the input
-    /// given.
-    Location,
-    /// Returns the start and end indices of each submatch in the input given.
-    Submatches,
-}
-
-/// Runs an NFA simulation on the compiled expression given on the search text
-/// `input`. The search begins at byte index `start` and ends at byte index
-/// `end`. (The range is specified here so that zero-width assertions will work
-/// correctly when searching for successive non-overlapping matches.)
-///
-/// The `which` parameter indicates what kind of capture information the caller
-/// wants. There are three choices: match existence only, the location of the
-/// entire match or the locations of the entire match in addition to the
-/// locations of each submatch.
-pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str,
-                   start: usize, end: usize) -> CaptureLocs {
-    Nfa {
-        which: which,
-        prog: prog,
-        input: input,
-        start: start,
-        end: end,
-        ic: 0,
-        chars: CharReader::new(input),
-    }.run()
-}
-
-struct Nfa<'r, 't> {
-    which: MatchKind,
-    prog: &'r Program,
-    input: &'t str,
-    start: usize,
-    end: usize,
-    ic: usize,
-    chars: CharReader<'t>,
-}
-
-/// Indicates the next action to take after a single non-empty instruction
-/// is processed.
-#[derive(Copy, Clone)]
-pub enum StepState {
-    /// This is returned if and only if a Match instruction is reached and
-    /// we only care about the existence of a match. It instructs the VM to
-    /// quit early.
-    StepMatchEarlyReturn,
-    /// Indicates that a match was found. Thus, the rest of the states in the
-    /// *current* queue should be dropped (i.e., leftmost-first semantics).
-    /// States in the "next" queue can still be processed.
-    StepMatch,
-    /// No match was found. Continue with the next state in the queue.
-    StepContinue,
-}
-
-impl<'r, 't> Nfa<'r, 't> {
-    fn run(&mut self) -> CaptureLocs {
-        let ncaps = match self.which {
-            Exists => 0,
-            Location => 1,
-            Submatches => self.prog.num_captures(),
-        };
-        let mut matched = false;
-        let ninsts = self.prog.insts.len();
-        let mut clist = Threads::new(self.which, ninsts, ncaps);
-        let mut nlist = Threads::new(self.which, ninsts, ncaps);
-        let mut groups = vec![None; ncaps * 2];
-
-        // Determine if the expression starts with a '^' so we can avoid
-        // simulating .*?
-        // Make sure multi-line mode isn't enabled for it, otherwise we can't
-        // drop the initial .*?
-        let prefix_anchor = match self.prog.insts[1] {
-            StartText => true,
-            _ => false,
-        };
-
-        self.ic = self.start;
-        let mut next_ic = self.chars.set(self.start);
-        while self.ic <= self.end {
-            if clist.size == 0 {
-                // We have a match and we're done exploring alternatives.
-                // Time to quit.
-                if matched {
-                    break
-                }
-
-                // If the expression starts with a '^' we can terminate as soon
-                // as the last thread dies.
-                if self.ic != 0 && prefix_anchor {
-                    break;
-                }
-
-                // If there are no threads to try, then we'll have to start
-                // over at the beginning of the regex.
-                // BUT, if there's a literal prefix for the program, try to
-                // jump ahead quickly. If it can't be found, then we can bail
-                // out early.
-                if self.prog.prefix.len() > 0 {
-                    let needle = self.prog.prefix.as_bytes();
-                    let haystack = &self.input.as_bytes()[self.ic..];
-                    match find_prefix(needle, haystack) {
-                        None => break,
-                        Some(i) => {
-                            self.ic += i;
-                            next_ic = self.chars.set(self.ic);
-                        }
-                    }
-                }
-            }
-
-            // This simulates a preceding '.*?' for every regex by adding
-            // a state starting at the current position in the input for the
-            // beginning of the program only if we don't already have a match.
-            if clist.size == 0 || (!prefix_anchor && !matched) {
-                self.add(&mut clist, 0, &mut groups)
-            }
-
-            // Now we try to read the next character.
-            // As a result, the 'step' method will look at the previous
-            // character.
-            self.ic = next_ic;
-            next_ic = self.chars.advance();
-
-            for i in 0..clist.size {
-                let pc = clist.pc(i);
-                let step_state = self.step(&mut groups, &mut nlist,
-                                           clist.groups(i), pc);
-                match step_state {
-                    StepMatchEarlyReturn => return vec![Some(0), Some(0)],
-                    StepMatch => { matched = true; break },
-                    StepContinue => {},
-                }
-            }
-            mem::swap(&mut clist, &mut nlist);
-            nlist.empty();
-        }
-        match self.which {
-            Exists if matched     => vec![Some(0), Some(0)],
-            Exists                => vec![None, None],
-            Location | Submatches => groups,
-        }
-    }
-
-    fn step(&self, groups: &mut [Option<usize>], nlist: &mut Threads,
-            caps: &mut [Option<usize>], pc: usize)
-           -> StepState {
-        match self.prog.insts[pc] {
-            Match => {
-                match self.which {
-                    Exists => {
-                        return StepMatchEarlyReturn
-                    }
-                    Location => {
-                        groups[0] = caps[0];
-                        groups[1] = caps[1];
-                        return StepMatch
-                    }
-                    Submatches => {
-                        for (slot, val) in groups.iter_mut().zip(caps.iter()) {
-                            *slot = *val;
-                        }
-                        return StepMatch
-                    }
-                }
-            }
-            OneChar { c, casei } => {
-                if self.char_eq(casei, self.chars.prev, c) {
-                    self.add(nlist, pc+1, caps);
-                }
-            }
-            CharClass(ref cls) => {
-                if self.chars.prev.map(|c| cls.matches(c)).unwrap_or(false) {
-                    self.add(nlist, pc+1, caps);
-                }
-            }
-            Any => self.add(nlist, pc+1, caps),
-            AnyNoNL => {
-                if !self.char_eq(false, self.chars.prev, '\n') {
-                    self.add(nlist, pc+1, caps)
-                }
-            }
-            StartLine | EndLine | StartText | EndText
-            | WordBoundary | NotWordBoundary
-            | Save(_) | Jump(_) | Split(_, _) => {},
-        }
-        StepContinue
-    }
-
-    fn add(&self, nlist: &mut Threads, pc: usize, groups: &mut [Option<usize>]) {
-        if nlist.contains(pc) {
-            return
-        }
-        // We have to add states to the threads list even if their empty.
-        // TL;DR - It prevents cycles.
-        // If we didn't care about cycles, we'd *only* add threads that
-        // correspond to non-jumping instructions (OneChar, Any, Match, etc.).
-        // But, it's possible for valid regexs (like '(a*)*') to result in
-        // a cycle in the instruction list. e.g., We'll keep chasing the Split
-        // instructions forever.
-        // So we add these instructions to our thread queue, but in the main
-        // VM loop, we look for them but simply ignore them.
-        // Adding them to the queue prevents them from being revisited so we
-        // can avoid cycles (and the inevitable stack overflow).
-        //
-        // We make a minor optimization by indicating that the state is "empty"
-        // so that its capture groups are not filled in.
-        match self.prog.insts[pc] {
-            StartLine => {
-                nlist.add(pc, groups, true);
-                if self.chars.is_begin() || self.char_is(self.chars.prev, '\n') {
-                    self.add(nlist, pc + 1, groups);
-                }
-            }
-            StartText => {
-                nlist.add(pc, groups, true);
-                if self.chars.is_begin() {
-                    self.add(nlist, pc + 1, groups);
-                }
-            }
-            EndLine => {
-                nlist.add(pc, groups, true);
-                if self.chars.is_end() || self.char_is(self.chars.cur, '\n') {
-                    self.add(nlist, pc + 1, groups)
-                }
-            }
-            EndText => {
-                nlist.add(pc, groups, true);
-                if self.chars.is_end() {
-                    self.add(nlist, pc + 1, groups)
-                }
-            }
-            WordBoundary => {
-                nlist.add(pc, groups, true);
-                if self.chars.is_word_boundary() {
-                    self.add(nlist, pc + 1, groups);
-                }
-            }
-            NotWordBoundary => {
-                nlist.add(pc, groups, true);
-                if !self.chars.is_word_boundary() {
-                    self.add(nlist, pc + 1, groups);
-                }
-            }
-            Save(slot) => {
-                nlist.add(pc, groups, true);
-                match self.which {
-                    Location if slot <= 1 => {
-                        let old = groups[slot];
-                        groups[slot] = Some(self.ic);
-                        self.add(nlist, pc + 1, groups);
-                        groups[slot] = old;
-                    }
-                    Submatches => {
-                        let old = groups[slot];
-                        groups[slot] = Some(self.ic);
-                        self.add(nlist, pc + 1, groups);
-                        groups[slot] = old;
-                    }
-                    Exists | Location => self.add(nlist, pc + 1, groups),
-                }
-            }
-            Jump(to) => {
-                nlist.add(pc, groups, true);
-                self.add(nlist, to, groups)
-            }
-            Split(x, y) => {
-                nlist.add(pc, groups, true);
-                self.add(nlist, x, groups);
-                self.add(nlist, y, groups);
-            }
-            Match | OneChar{..} | CharClass(_) | Any | AnyNoNL => {
-                nlist.add(pc, groups, false);
-            }
-        }
-    }
-
-    // Use Unicode simple case folding for case insensitive comparisons,
-    // as we’re matching individual code points.
-    #[inline]
-    fn char_eq(&self, casei: bool, textc: Option<char>, regc: char) -> bool {
-        match textc {
-            None => false,
-            Some(textc) => {
-                regc == textc || (casei && syntax::simple_case_fold(regc) == syntax::simple_case_fold(textc))
-            }
-        }
-    }
-
-    #[inline]
-    fn char_is(&self, textc: Option<char>, regc: char) -> bool {
-        textc == Some(regc)
-    }
-}
-
-/// CharReader is responsible for maintaining a "previous" and a "current"
-/// character. This one-character lookahead is necessary for assertions that
-/// look one character before or after the current position.
-pub struct CharReader<'t> {
-    /// The previous character read. It is None only when processing the first
-    /// character of the input.
-    pub prev: Option<char>,
-    /// The current character.
-    pub cur: Option<char>,
-    input: &'t str,
-    next: usize,
-}
-
-impl<'t> CharReader<'t> {
-    /// Returns a new CharReader that advances through the input given.
-    /// Note that a CharReader has no knowledge of the range in which to search
-    /// the input.
-    pub fn new(input: &'t str) -> CharReader<'t> {
-        CharReader {
-            prev: None,
-            cur: None,
-            input: input,
-            next: 0,
-       }
-    }
-
-    /// Sets the previous and current character given any arbitrary byte
-    /// index (at a Unicode codepoint boundary).
-    #[inline]
-    pub fn set(&mut self, ic: usize) -> usize {
-        self.prev = None;
-        self.cur = None;
-        self.next = 0;
-
-        if self.input.len() == 0 {
-            return 1
-        }
-        if ic > 0 {
-            let i = cmp::min(ic, self.input.len());
-            self.prev = self.input[..i].chars().rev().next();
-        }
-        if ic < self.input.len() {
-            let cur = self.input[ic..].chars().next().unwrap();
-            self.cur = Some(cur);
-            self.next = ic + cur.len_utf8();
-            self.next
-        } else {
-            self.input.len() + 1
-        }
-    }
-
-    /// Does the same as `set`, except it always advances to the next
-    /// character in the input (and therefore does half as many UTF8 decodings).
-    #[inline]
-    pub fn advance(&mut self) -> usize {
-        self.prev = self.cur;
-        if self.next < self.input.len() {
-            let cur = self.input[self.next..].chars().next().unwrap();
-            self.cur = Some(cur);
-            self.next += cur.len_utf8();
-        } else {
-            self.cur = None;
-            self.next = self.input.len() + 1;
-        }
-        self.next
-    }
-
-    /// Returns true if and only if this is the beginning of the input
-    /// (ignoring the range of the input to search).
-    #[inline]
-    pub fn is_begin(&self) -> bool { self.prev.is_none() }
-
-    /// Returns true if and only if this is the end of the input
-    /// (ignoring the range of the input to search).
-    #[inline]
-    pub fn is_end(&self) -> bool { self.cur.is_none() }
-
-    /// Returns true if and only if the current position is a word boundary.
-    /// (Ignoring the range of the input to search.)
-    pub fn is_word_boundary(&self) -> bool {
-        fn is_word(c: Option<char>) -> bool {
-            c.map(syntax::is_word_char).unwrap_or(false)
-        }
-
-        if self.is_begin() {
-            return is_word(self.cur);
-        }
-        if self.is_end() {
-            return is_word(self.prev);
-        }
-        (is_word(self.cur) && !is_word(self.prev))
-        || (is_word(self.prev) && !is_word(self.cur))
-    }
-}
-
-#[derive(Clone)]
-struct Thread {
-    pc: usize,
-    groups: Vec<Option<usize>>,
-}
-
-struct Threads {
-    which: MatchKind,
-    queue: Vec<Thread>,
-    sparse: Vec<usize>,
-    size: usize,
-}
-
-impl Threads {
-    // This is using a wicked neat trick to provide constant time lookup
-    // for threads in the queue using a sparse set. A queue of threads is
-    // allocated once with maximal size when the VM initializes and is reused
-    // throughout execution. That is, there should be zero allocation during
-    // the execution of a VM.
-    //
-    // See http://research.swtch.com/sparse for the deets.
-    fn new(which: MatchKind, num_insts: usize, ncaps: usize) -> Threads {
-        let t = Thread { pc: 0, groups: vec![None; ncaps * 2] };
-        Threads {
-            which: which,
-            queue: vec![t; num_insts],
-            sparse: vec![0; num_insts],
-            size: 0,
-        }
-    }
-
-    fn add(&mut self, pc: usize, groups: &[Option<usize>], empty: bool) {
-        let t = &mut self.queue[self.size];
-        t.pc = pc;
-        match (empty, self.which) {
-            (_, Exists) | (true, _) => {},
-            (false, Location) => {
-                t.groups[0] = groups[0];
-                t.groups[1] = groups[1];
-            }
-            (false, Submatches) => {
-                for (slot, val) in t.groups.iter_mut().zip(groups.iter()) {
-                    *slot = *val;
-                }
-            }
-        }
-        self.sparse[pc] = self.size;
-        self.size += 1;
-    }
-
-    #[inline]
-    fn contains(&self, pc: usize) -> bool {
-        let s = self.sparse[pc];
-        s < self.size && self.queue[s].pc == pc
-    }
-
-    #[inline]
-    fn empty(&mut self) {
-        self.size = 0;
-    }
-
-    #[inline]
-    fn pc(&self, i: usize) -> usize {
-        self.queue[i].pc
-    }
-
-    #[inline]
-    fn groups(&mut self, i: usize) -> &mut [Option<usize>] {
-        &mut self.queue[i].groups
-    }
-}
-
-/// Returns the starting location of `needle` in `haystack`.
-/// If `needle` is not in `haystack`, then `None` is returned.
-///
-/// Note that this is using a naive substring algorithm.
-#[inline]
-pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option<usize> {
-    let (hlen, nlen) = (haystack.len(), needle.len());
-    if nlen > hlen || nlen == 0 {
-        return None
-    }
-    for (offset, window) in haystack.windows(nlen).enumerate() {
-        if window == needle {
-            return Some(offset)
-        }
-    }
-    None
-}