Skip to content

Commit

Permalink
Merge branch 'jneem-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
BurntSushi committed Sep 4, 2016
2 parents 0be0b4c + 56584ad commit 12cb63b
Showing 1 changed file with 37 additions and 56 deletions.
93 changes: 37 additions & 56 deletions src/simd_accel/teddy128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ better. Namely:
1. Teddy's core algorithm scans the haystack in 16 byte chunks. 16 is
significant because it corresponds to the number of bytes in a SIMD vector.
If one used AVX2 instructions, then we could scan the haystack in 32 byte
chunks. Similarly, if one used AVX512 instructions, we could sca the
chunks. Similarly, if one used AVX512 instructions, we could scan the
haystack in 64 byte chunks. Hyperscan implements SIMD + AVX2, we only
implement SIMD for the moment. (The author doesn't have a CPU with AVX2
support... yet.)
Expand Down Expand Up @@ -326,11 +326,10 @@ References
// TODO: Make the inner loop do aligned loads.

use std::cmp;
use std::mem::transmute;
use std::ptr;

use simd::u8x16;
use simd::x86::sse2::u64x2;
use simd::x86::sse2::Sse2Bool8ix16;
use simd::x86::ssse3::Ssse3U8x16;

use syntax;
Expand Down Expand Up @@ -456,8 +455,9 @@ impl Teddy {
// N.B. `res0` is our `C` in the module documentation.
let res0 = self.masks.members1(h);
// Only do expensive verification if there are any non-zero bits.
if res0.ne(zero).any() {
if let Some(m) = self.verify_128(haystack, pos, res0) {
let bitfield = res0.ne(zero).move_mask();
if bitfield != 0 {
if let Some(m) = self.verify(haystack, pos, res0, bitfield) {
return Some(m);
}
}
Expand Down Expand Up @@ -510,9 +510,11 @@ impl Teddy {
// `AND`'s our `C` values together.
let res = res0prev0 & res1;
prev0 = res0;
if res.ne(zero).any() {

let bitfield = res.ne(zero).move_mask();
if bitfield != 0 {
let pos = pos.checked_sub(1).unwrap();
if let Some(m) = self.verify_128(haystack, pos, res) {
if let Some(m) = self.verify(haystack, pos, res, bitfield) {
return Some(m);
}
}
Expand Down Expand Up @@ -568,9 +570,11 @@ impl Teddy {

prev0 = res0;
prev1 = res1;
if res.ne(zero).any() {

let bitfield = res.ne(zero).move_mask();
if bitfield != 0 {
let pos = pos.checked_sub(2).unwrap();
if let Some(m) = self.verify_128(haystack, pos, res) {
if let Some(m) = self.verify(haystack, pos, res, bitfield) {
return Some(m);
}
}
Expand All @@ -585,63 +589,40 @@ impl Teddy {

/// Runs the verification procedure on `res` (i.e., `C` from the module
/// documentation), where the haystack block starts at `pos` in
/// `haystack`.
/// `haystack`. `bitfield` has ones in the bit positions that `res` has
/// non-zero bytes.
///
/// If a match exists, it returns the first one.
#[inline(always)]
fn verify_128(
fn verify(
&self,
haystack: &[u8],
pos: usize,
res: u8x16,
mut bitfield: u32,
) -> Option<Match> {
// The verification procedure is more amenable to standard 64 bit
// values, so get those.
let res64: u64x2 = unsafe { transmute(res) };
let reshi = res64.extract(0);
let reslo = res64.extract(1);
if let Some(m) = self.verify_64(haystack, pos, reshi, 0) {
return Some(m);
}
if let Some(m) = self.verify_64(haystack, pos, reslo, 8) {
return Some(m);
}
None
}

/// Runs the verification procedure on half of `C`.
///
/// If a match exists, it returns the first one.
///
/// `offset` is an additional byte offset to add to the position before
/// substring match verification.
#[inline(always)]
fn verify_64(
&self,
haystack: &[u8],
pos: usize,
mut res: u64,
offset: usize,
) -> Option<Match> {
// There's a possible match so long as there's at least one bit set.
while res != 0 {
// The next possible match is at the least significant bit.
let bit = res.trailing_zeros();
// The position of the bit in its corresponding lane gives us the
// corresponding bucket.
let bucket = (bit % 8) as usize;
// The lane that the bit is in gives us its offset.
let bytei = (bit / 8) as usize;
// Compute the start of where a substring would start.
let start = pos + offset + bytei;
// Kill off this bit. If we couldn't match anything, we'll go to
// the next bit.
res &= !(1 << bit);
// Actual substring search verification.
if let Some(m) = self.verify_bucket(haystack, bucket, start) {
return Some(m);
while bitfield != 0 {
// The next offset, relative to pos, where some fingerprint matched.
let byte_pos = bitfield.trailing_zeros();
bitfield &= !(1 << byte_pos);

// Offset relative to the beginning of the haystack.
let start = pos + byte_pos as usize;

// The bitfield telling us which patterns had fingerprints that match at this starting
// position.
let mut patterns = res.extract(byte_pos);
while patterns != 0 {
let bucket = patterns.trailing_zeros() as usize;
patterns &= !(1 << bucket);

// Actual substring search verification.
if let Some(m) = self.verify_bucket(haystack, bucket, start) {
return Some(m);
}
}
}

None
}

Expand Down

0 comments on commit 12cb63b

Please sign in to comment.