From 258c0623e1e4e49a8d6ac3471dda3dd4030a2191 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 1 Sep 2021 17:26:19 -0400 Subject: [PATCH 01/17] Extract Single-Segment Variant of ZSTD_dfast --- lib/compress/zstd_double_fast.c | 162 +++++++++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index c17367859e..ee902e19e9 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -47,6 +47,160 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, } +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_singleSegment_generic"); + + /* init */ + ip += ((ip - prefixLowest) == 0); + { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check noDict repcode */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } + } + + /* if no long +1 match, explore the short match we found */ + { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_doubleFast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -323,13 +477,13 @@ size_t ZSTD_compressBlock_doubleFast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 7); } } From 1bdf0410713b25a5c3419e12f32ab16eac62ba86 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 1 Sep 2021 17:50:53 -0400 Subject: [PATCH 02/17] Track Step Rather than Recalculating (+0.5% Speed) --- lib/compress/zstd_double_fast.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index ee902e19e9..73a8e08ebf 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -70,6 +70,9 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; + size_t step = 1; + const size_t kStepIncr = 1 << kSearchStrength; + const BYTE* nextStep = ip + kStepIncr; DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_singleSegment_generic"); @@ -121,7 +124,14 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( } } - ip += ((ip-anchor) >> kSearchStrength) + 1; + if (ip >= nextStep) { + PREFETCH_L1(ip + 64); + PREFETCH_L1(ip + 128); + step++; + nextStep += kStepIncr; + } + ip += step; + #if defined(__aarch64__) PREFETCH_L1(ip+256); #endif @@ -190,6 +200,9 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( anchor = ip; continue; /* faster when present ... (?) */ } } + + step = 1; + nextStep = ip + kStepIncr; } /* while (ip < ilimit) */ /* save reps for next block */ From 072ffaad67b67ea4aef50b7370268fe28fa3b7e1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 2 Sep 2021 12:03:49 -0400 Subject: [PATCH 03/17] Extract Working Variables --- lib/compress/zstd_double_fast.c | 70 ++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 73a8e08ebf..c3694426c0 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -74,6 +74,21 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( const size_t kStepIncr = 1 << kSearchStrength; const BYTE* nextStep = ip + kStepIncr; + size_t hl0; + size_t hs0; + size_t hl1; + // size_t hs1; + + U32 idxl0; + U32 idxs0; + U32 idxl1; + // U32 idxs0; + + const BYTE* matchl0; + const BYTE* matchs0; + const BYTE* matchl1; + // const BYTE* matchs1; + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_singleSegment_generic"); /* init */ @@ -90,14 +105,15 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mLength; U32 offset; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); U32 const curr = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 matchIndexS = hashSmall[h]; - const BYTE* matchLong = base + matchIndexL; - const BYTE* match = base + matchIndexS; - hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + hs0 = ZSTD_hashPtr(ip, hBitsS, mls); + idxl0 = hashLong[hl0]; + idxs0 = hashSmall[hs0]; + matchl0 = base + idxl0; + matchs0 = base + idxs0; + + hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ /* check noDict repcode */ if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { @@ -107,19 +123,19 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( goto _match_stored; } - if (matchIndexL > prefixLowestIndex) { + if (idxl0 > prefixLowestIndex) { /* check prefix long match */ - if (MEM_read64(matchLong) == MEM_read64(ip)) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + if (MEM_read64(matchl0) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; + offset = (U32)(ip-matchl0); + while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ goto _match_found; } } - if (matchIndexS > prefixLowestIndex) { + if (idxs0 > prefixLowestIndex) { /* check prefix short match */ - if (MEM_read32(match) == MEM_read32(ip)) { + if (MEM_read32(matchs0) == MEM_read32(ip)) { goto _search_next_long; } } @@ -139,18 +155,18 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( _search_next_long: - { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = curr + 1; + { hl1 = ZSTD_hashPtr(ip+1, hBitsL, 8); + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; + hashLong[hl1] = curr + 1; /* check prefix long +1 match */ - if (matchIndexL3 > prefixLowestIndex) { - if (MEM_read64(matchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchl1+8, iend) + 8; ip++; - offset = (U32)(ip-matchL3); - while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ goto _match_found; } } @@ -158,14 +174,14 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( /* if no long +1 match, explore the short match we found */ { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip - match); - while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ } /* fall-through */ -_match_found: +_match_found: /* requires ip, offset, mLength */ offset_2 = offset_1; offset_1 = offset; From a1ac7205d031a7f27ad1d5eae9deced4bba938f1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 2 Sep 2021 12:15:58 -0400 Subject: [PATCH 04/17] Pull Match Found Stuff Out of the Loop --- lib/compress/zstd_double_fast.c | 157 +++++++++++++++++--------------- 1 file changed, 85 insertions(+), 72 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index c3694426c0..0cf2d2159f 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -60,7 +60,6 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( const U32 hBitsS = cParams->chainLog; const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); /* presumes that, if there is a dictionary, it must be using Attach mode */ @@ -70,9 +69,14 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; - size_t step = 1; + + size_t mLength; + U32 offset; + U32 curr; + const size_t kStepIncr = 1 << kSearchStrength; - const BYTE* nextStep = ip + kStepIncr; + const BYTE* nextStep; + size_t step; size_t hl0; size_t hs0; @@ -89,23 +93,34 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( const BYTE* matchl1; // const BYTE* matchs1; + const BYTE* ip = istart; + const BYTE* ip1; + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_singleSegment_generic"); /* init */ ip += ((ip - prefixLowest) == 0); { - U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } +_start: + + step = 1; + nextStep = ip + kStepIncr; + ip1 = ip + step; + + if (ip1 >= ilimit) { + goto _cleanup; + } + /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - U32 offset; - U32 const curr = (U32)(ip-base); + do { + curr = (U32)(ip-base); hl0 = ZSTD_hashPtr(ip, hBitsL, 8); hs0 = ZSTD_hashPtr(ip, hBitsS, mls); idxl0 = hashLong[hl0]; @@ -151,82 +166,80 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( #if defined(__aarch64__) PREFETCH_L1(ip+256); #endif - continue; + } while (ip < ilimit); -_search_next_long: +_cleanup: + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; - { hl1 = ZSTD_hashPtr(ip+1, hBitsL, 8); - idxl1 = hashLong[hl1]; - matchl1 = base + idxl1; - hashLong[hl1] = curr + 1; + /* Return the last literals size */ + return (size_t)(iend - anchor); - /* check prefix long +1 match */ - if (idxl1 > prefixLowestIndex) { - if (MEM_read64(matchl1) == MEM_read64(ip+1)) { - mLength = ZSTD_count(ip+9, matchl1+8, iend) + 8; - ip++; - offset = (U32)(ip-matchl1); - while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ - goto _match_found; - } +_search_next_long: + { hl1 = ZSTD_hashPtr(ip+1, hBitsL, 8); + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; + hashLong[hl1] = curr + 1; + + /* check prefix long +1 match */ + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchl1+8, iend) + 8; + ip++; + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ + goto _match_found; } } + } - /* if no long +1 match, explore the short match we found */ - { - mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; - offset = (U32)(ip - matchs0); - while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ - } + /* if no long +1 match, explore the short match we found */ + { + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ + } - /* fall-through */ + /* fall-through */ _match_found: /* requires ip, offset, mLength */ - offset_2 = offset_1; - offset_1 = offset; + offset_2 = offset_1; + offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = curr+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } - - /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } - - step = 1; - nextStep = ip + kStepIncr; - } /* while (ip < ilimit) */ + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } - /* Return the last literals size */ - return (size_t)(iend - anchor); + goto _start; } From db4e1b5479c1df569732aa8f486633ed4098bd8c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 2 Sep 2021 12:25:08 -0400 Subject: [PATCH 05/17] Hash Long One Position Ahead (+2.5% Speed) Aside from maybe a latency win in the loop, this means that when we find a short match, we've already done the hash we need to check the next long match. --- lib/compress/zstd_double_fast.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 0cf2d2159f..4e38e4662e 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -118,10 +118,11 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( goto _cleanup; } + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + /* Main Search Loop */ do { curr = (U32)(ip-base); - hl0 = ZSTD_hashPtr(ip, hBitsL, 8); hs0 = ZSTD_hashPtr(ip, hBitsS, mls); idxl0 = hashLong[hl0]; idxs0 = hashSmall[hs0]; @@ -148,6 +149,8 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( } } + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + if (idxs0 > prefixLowestIndex) { /* check prefix short match */ if (MEM_read32(matchs0) == MEM_read32(ip)) { @@ -155,18 +158,20 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( } } - if (ip >= nextStep) { - PREFETCH_L1(ip + 64); - PREFETCH_L1(ip + 128); + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); step++; nextStep += kStepIncr; } - ip += step; + ip = ip1; + ip1 += step; + hl0 = hl1; #if defined(__aarch64__) PREFETCH_L1(ip+256); #endif - } while (ip < ilimit); + } while (ip1 < ilimit); _cleanup: /* save reps for next block */ @@ -177,8 +182,7 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( return (size_t)(iend - anchor); _search_next_long: - { hl1 = ZSTD_hashPtr(ip+1, hBitsL, 8); - idxl1 = hashLong[hl1]; + { idxl1 = hashLong[hl1]; matchl1 = base + idxl1; hashLong[hl1] = curr + 1; From 39f2491bfc70db883d45265af0f764d1e80cfa69 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 8 Sep 2021 12:41:15 -0400 Subject: [PATCH 06/17] Use Look-Ahead Hash for Next Long Check after Short Match (+0.5% Speed) This costs a little ratio, unfortunately. --- lib/compress/zstd_double_fast.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 4e38e4662e..45770b886b 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -139,6 +139,8 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( goto _match_stored; } + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + if (idxl0 > prefixLowestIndex) { /* check prefix long match */ if (MEM_read64(matchl0) == MEM_read64(ip)) { @@ -149,8 +151,6 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( } } - hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); - if (idxs0 > prefixLowestIndex) { /* check prefix short match */ if (MEM_read32(matchs0) == MEM_read32(ip)) { @@ -184,13 +184,12 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( _search_next_long: { idxl1 = hashLong[hl1]; matchl1 = base + idxl1; - hashLong[hl1] = curr + 1; /* check prefix long +1 match */ if (idxl1 > prefixLowestIndex) { - if (MEM_read64(matchl1) == MEM_read64(ip+1)) { - mLength = ZSTD_count(ip+9, matchl1+8, iend) + 8; - ip++; + if (MEM_read64(matchl1) == MEM_read64(ip1)) { + ip = ip1; + mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; offset = (U32)(ip-matchl1); while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ goto _match_found; @@ -205,6 +204,17 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ } + if (step < 4) { + /* It is unsafe to write this value back to the hashtable when ip1 is + * greater than or equal to the new ip we will have after we're done + * processing this match. Rather than perform that test directly + * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler + * more predictable test. The minmatch even if we take a short match is + * 4 bytes, so as long as step, the distance between ip and ip1 + * (initially) is less than 4, we know ip1 < new ip. */ + hashLong[hl1] = (U32)(ip1 - base); + } + /* fall-through */ _match_found: /* requires ip, offset, mLength */ From 2ddef7c872c123383aa8b998a3b428bf359a8ce0 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 8 Sep 2021 12:45:42 -0400 Subject: [PATCH 07/17] Write Back Advanced Hash in Long Matches as Well (+Ratio) Since we're now hashing the position ahead even if we find a long match and don't search that next position, we can write it back into the hashtable even in long matches. This seems to cost us no speed, and improves compression ratio slightly! --- lib/compress/zstd_double_fast.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 45770b886b..2e310e3223 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -204,6 +204,12 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ } + /* fall-through */ + +_match_found: /* requires ip, offset, mLength */ + offset_2 = offset_1; + offset_1 = offset; + if (step < 4) { /* It is unsafe to write this value back to the hashtable when ip1 is * greater than or equal to the new ip we will have after we're done @@ -215,12 +221,6 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( hashLong[hl1] = (U32)(ip1 - base); } - /* fall-through */ - -_match_found: /* requires ip, offset, mLength */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: From 6ae44c0db8d9832dd6d1ac8cdd3435dd686c0d0a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 8 Sep 2021 16:15:01 -0400 Subject: [PATCH 08/17] Advance Long Index Lookup (+0.5% Speed) This lookup can be advanced to before the short match check because either way we will use it (in the next loop iter or in `_search_next_long`). --- lib/compress/zstd_double_fast.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 2e310e3223..4ac44894b3 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -81,17 +81,14 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( size_t hl0; size_t hs0; size_t hl1; - // size_t hs1; U32 idxl0; U32 idxs0; U32 idxl1; - // U32 idxs0; const BYTE* matchl0; const BYTE* matchs0; const BYTE* matchl1; - // const BYTE* matchs1; const BYTE* ip = istart; const BYTE* ip1; @@ -119,14 +116,14 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( } hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + idxl0 = hashLong[hl0]; + matchl0 = base + idxl0; /* Main Search Loop */ do { curr = (U32)(ip-base); hs0 = ZSTD_hashPtr(ip, hBitsS, mls); - idxl0 = hashLong[hl0]; idxs0 = hashSmall[hs0]; - matchl0 = base + idxl0; matchs0 = base + idxs0; hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ @@ -151,6 +148,9 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( } } + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; + if (idxs0 > prefixLowestIndex) { /* check prefix short match */ if (MEM_read32(matchs0) == MEM_read32(ip)) { @@ -168,6 +168,8 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( ip1 += step; hl0 = hl1; + idxl0 = idxl1; + matchl0 = matchl1; #if defined(__aarch64__) PREFETCH_L1(ip+256); #endif @@ -182,8 +184,7 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( return (size_t)(iend - anchor); _search_next_long: - { idxl1 = hashLong[hl1]; - matchl1 = base + idxl1; + { /* check prefix long +1 match */ if (idxl1 > prefixLowestIndex) { From 2cdfad538c24e8fd108e1c46101f4e7ec645663c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 8 Sep 2021 16:41:43 -0400 Subject: [PATCH 09/17] Search One Last Position --- lib/compress/zstd_double_fast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 4ac44894b3..a85d68f01c 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -111,7 +111,7 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( nextStep = ip + kStepIncr; ip1 = ip + step; - if (ip1 >= ilimit) { + if (ip1 > ilimit) { goto _cleanup; } @@ -173,7 +173,7 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( #if defined(__aarch64__) PREFETCH_L1(ip+256); #endif - } while (ip1 < ilimit); + } while (ip1 <= ilimit); _cleanup: /* save reps for next block */ From 47fd762eccb3d8273a8e9c20239fd069271c3d0d Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 8 Sep 2021 16:47:26 -0400 Subject: [PATCH 10/17] Nit: Unnest Blocks that Don't Declare Anything --- lib/compress/zstd_double_fast.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index a85d68f01c..3a579e76dc 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -184,26 +184,22 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( return (size_t)(iend - anchor); _search_next_long: - { - /* check prefix long +1 match */ - if (idxl1 > prefixLowestIndex) { - if (MEM_read64(matchl1) == MEM_read64(ip1)) { - ip = ip1; - mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; - offset = (U32)(ip-matchl1); - while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ - goto _match_found; - } + /* check prefix long +1 match */ + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip1)) { + ip = ip1; + mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ + goto _match_found; } } /* if no long +1 match, explore the short match we found */ - { - mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; - offset = (U32)(ip - matchs0); - while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ - } + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ /* fall-through */ From fcab4841aa0df69de0dfe867f9a74510ad7a459c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 9 Sep 2021 16:39:29 -0400 Subject: [PATCH 11/17] Nit: Rename Function --- lib/compress/zstd_double_fast.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 3a579e76dc..8a8e8263d5 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -48,10 +48,9 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( +size_t ZSTD_compressBlock_doubleFast_noDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) + void const* src, size_t srcSize, U32 const mls /* template */) { ZSTD_compressionParameters const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; @@ -93,7 +92,7 @@ size_t ZSTD_compressBlock_doubleFast_singleSegment_generic( const BYTE* ip = istart; const BYTE* ip1; - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_singleSegment_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); /* init */ ip += ((ip - prefixLowest) == 0); @@ -530,13 +529,13 @@ size_t ZSTD_compressBlock_doubleFast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_doubleFast_singleSegment_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 7); } } From 051b473e7ebce26f0e2ea7d2ced994f2ec2bfb59 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 9 Sep 2021 16:42:13 -0400 Subject: [PATCH 12/17] Fall Back in _extDict to New _noDict Rather than Old Merged Impl --- lib/compress/zstd_double_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 8a8e8263d5..380214c578 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -591,7 +591,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, mls); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ From 62536ef7da6023a1a0d533351b402b5418c4199a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 9 Sep 2021 17:01:31 -0400 Subject: [PATCH 13/17] Simplify DMS Implementation by Removing noDict Support --- lib/compress/zstd_double_fast.c | 151 +++++++++++--------------------- 1 file changed, 50 insertions(+), 101 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 380214c578..c58e5cd3f1 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -254,10 +254,10 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( +size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const mls /* template */, ZSTD_dictMode_e const dictMode) + U32 const mls /* template */) { ZSTD_compressionParameters const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; @@ -278,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic( U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? - dms->chainTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hBitsL; - const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? - dictCParams->chainLog : hBitsS; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams; + const U32* const dictHashLong = dms->hashTable; + const U32* const dictHashSmall = dms->chainTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); + const U32 dictHBitsL = dictCParams->hashLog; + const U32 dictHBitsS = dictCParams->chainLog; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); - - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); /* if a dictionary is attached, it must be within window range */ - if (dictMode == ZSTD_dictMatchState) { - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - } + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); /* init */ ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } + + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ @@ -341,15 +317,13 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* matchLong = base + matchIndexL; const BYTE* match = base + matchIndexS; const U32 repIndex = curr + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? + const BYTE* repMatch = (repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ - /* check dictMatchState repcode */ - if (dictMode == ZSTD_dictMatchState - && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + /* check repcode */ + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; @@ -358,15 +332,6 @@ size_t ZSTD_compressBlock_doubleFast_generic( goto _match_stored; } - /* check noDict repcode */ - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - if (matchIndexL > prefixLowestIndex) { /* check prefix long match */ if (MEM_read64(matchLong) == MEM_read64(ip)) { @@ -375,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ goto _match_found; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dictMatchState long match */ U32 const dictMatchIndexL = dictHashLong[dictHL]; const BYTE* dictMatchL = dictBase + dictMatchIndexL; @@ -393,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (MEM_read32(match) == MEM_read32(ip)) { goto _search_next_long; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dictMatchState short match */ U32 const dictMatchIndexS = dictHashSmall[dictHS]; match = dictBase + dictMatchIndexS; @@ -426,7 +391,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ goto _match_found; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dict long +1 match */ U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; @@ -440,7 +405,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( } } } /* if no long +1 match, explore the short match we found */ - if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + if (matchIndexS < prefixLowestIndex) { mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; offset = (U32)(curr - matchIndexS); while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ @@ -472,43 +437,27 @@ size_t ZSTD_compressBlock_doubleFast_generic( } /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } - - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; anchor = ip; - continue; /* faster when present ... (?) */ - } } } + continue; + } + break; + } + } } /* while (ip < ilimit) */ /* save reps for next block */ @@ -549,13 +498,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); } } From c2c32839dc8cc16ea8a75c2f552018623def8464 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 9 Sep 2021 17:17:46 -0400 Subject: [PATCH 14/17] Update results.csv --- tests/regression/results.csv | 448 +++++++++++++++++------------------ 1 file changed, 224 insertions(+), 224 deletions(-) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 2538728c84..72ac1f27be 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -2,10 +2,10 @@ Data, Config, Method, silesia.tar, level -5, compress simple, 7359401 silesia.tar, level -3, compress simple, 6901672 silesia.tar, level -1, compress simple, 6182241 -silesia.tar, level 0, compress simple, 4861424 +silesia.tar, level 0, compress simple, 4854086 silesia.tar, level 1, compress simple, 5331946 -silesia.tar, level 3, compress simple, 4861424 -silesia.tar, level 4, compress simple, 4799632 +silesia.tar, level 3, compress simple, 4854086 +silesia.tar, level 4, compress simple, 4791503 silesia.tar, level 5, compress simple, 4649987 silesia.tar, level 6, compress simple, 4616797 silesia.tar, level 7, compress simple, 4576661 @@ -13,16 +13,16 @@ silesia.tar, level 9, compress silesia.tar, level 13, compress simple, 4502956 silesia.tar, level 16, compress simple, 4360527 silesia.tar, level 19, compress simple, 4267266 -silesia.tar, uncompressed literals, compress simple, 4861424 +silesia.tar, uncompressed literals, compress simple, 4854086 silesia.tar, uncompressed literals optimal, compress simple, 4267266 silesia.tar, huffman literals, compress simple, 6182241 github.tar, level -5, compress simple, 66914 github.tar, level -3, compress simple, 52127 github.tar, level -1, compress simple, 42560 -github.tar, level 0, compress simple, 38441 +github.tar, level 0, compress simple, 38831 github.tar, level 1, compress simple, 39200 -github.tar, level 3, compress simple, 38441 -github.tar, level 4, compress simple, 38467 +github.tar, level 3, compress simple, 38831 +github.tar, level 4, compress simple, 38893 github.tar, level 5, compress simple, 38366 github.tar, level 6, compress simple, 38648 github.tar, level 7, compress simple, 38110 @@ -30,16 +30,16 @@ github.tar, level 9, compress github.tar, level 13, compress simple, 35501 github.tar, level 16, compress simple, 40471 github.tar, level 19, compress simple, 32134 -github.tar, uncompressed literals, compress simple, 38441 +github.tar, uncompressed literals, compress simple, 38831 github.tar, uncompressed literals optimal, compress simple, 32134 github.tar, huffman literals, compress simple, 42560 silesia, level -5, compress cctx, 7354675 silesia, level -3, compress cctx, 6902374 silesia, level -1, compress cctx, 6177565 -silesia, level 0, compress cctx, 4849553 +silesia, level 0, compress cctx, 4842075 silesia, level 1, compress cctx, 5309098 -silesia, level 3, compress cctx, 4849553 -silesia, level 4, compress cctx, 4786968 +silesia, level 3, compress cctx, 4842075 +silesia, level 4, compress cctx, 4779186 silesia, level 5, compress cctx, 4638691 silesia, level 6, compress cctx, 4605296 silesia, level 7, compress cctx, 4566984 @@ -47,28 +47,28 @@ silesia, level 9, compress silesia, level 13, compress cctx, 4493990 silesia, level 16, compress cctx, 4359864 silesia, level 19, compress cctx, 4296880 -silesia, long distance mode, compress cctx, 4849553 -silesia, multithreaded, compress cctx, 4849553 -silesia, multithreaded long distance mode, compress cctx, 4849553 -silesia, small window log, compress cctx, 7084179 +silesia, long distance mode, compress cctx, 4842075 +silesia, multithreaded, compress cctx, 4842075 +silesia, multithreaded long distance mode, compress cctx, 4842075 +silesia, small window log, compress cctx, 7082951 silesia, small hash log, compress cctx, 6526141 silesia, small chain log, compress cctx, 4912197 silesia, explicit params, compress cctx, 4794052 -silesia, uncompressed literals, compress cctx, 4849553 +silesia, uncompressed literals, compress cctx, 4842075 silesia, uncompressed literals optimal, compress cctx, 4296880 silesia, huffman literals, compress cctx, 6177565 -silesia, multithreaded with advanced params, compress cctx, 4849553 +silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 232315 github, level -5 with dict, compress cctx, 47294 github, level -3, compress cctx, 220760 github, level -3 with dict, compress cctx, 48047 github, level -1, compress cctx, 175468 github, level -1 with dict, compress cctx, 43527 -github, level 0, compress cctx, 136335 +github, level 0, compress cctx, 136332 github, level 0 with dict, compress cctx, 41534 github, level 1, compress cctx, 142365 github, level 1 with dict, compress cctx, 42157 -github, level 3, compress cctx, 136335 +github, level 3, compress cctx, 136332 github, level 3 with dict, compress cctx, 41534 github, level 4, compress cctx, 136199 github, level 4 with dict, compress cctx, 41725 @@ -86,24 +86,24 @@ github, level 16, compress github, level 16 with dict, compress cctx, 37568 github, level 19, compress cctx, 134064 github, level 19 with dict, compress cctx, 37567 -github, long distance mode, compress cctx, 141102 -github, multithreaded, compress cctx, 141102 -github, multithreaded long distance mode, compress cctx, 141102 -github, small window log, compress cctx, 141102 +github, long distance mode, compress cctx, 141069 +github, multithreaded, compress cctx, 141069 +github, multithreaded long distance mode, compress cctx, 141069 +github, small window log, compress cctx, 141069 github, small hash log, compress cctx, 138949 github, small chain log, compress cctx, 139242 github, explicit params, compress cctx, 140932 -github, uncompressed literals, compress cctx, 136335 +github, uncompressed literals, compress cctx, 136332 github, uncompressed literals optimal, compress cctx, 134064 github, huffman literals, compress cctx, 175468 -github, multithreaded with advanced params, compress cctx, 141102 +github, multithreaded with advanced params, compress cctx, 141069 silesia, level -5, zstdcli, 7354723 silesia, level -3, zstdcli, 6902422 silesia, level -1, zstdcli, 6177613 -silesia, level 0, zstdcli, 4849601 +silesia, level 0, zstdcli, 4842123 silesia, level 1, zstdcli, 5309146 -silesia, level 3, zstdcli, 4849601 -silesia, level 4, zstdcli, 4787016 +silesia, level 3, zstdcli, 4842123 +silesia, level 4, zstdcli, 4779234 silesia, level 5, zstdcli, 4638739 silesia, level 6, zstdcli, 4605344 silesia, level 7, zstdcli, 4567032 @@ -111,24 +111,24 @@ silesia, level 9, zstdcli, silesia, level 13, zstdcli, 4494038 silesia, level 16, zstdcli, 4359912 silesia, level 19, zstdcli, 4296928 -silesia, long distance mode, zstdcli, 4840807 -silesia, multithreaded, zstdcli, 4849601 -silesia, multithreaded long distance mode, zstdcli, 4840807 -silesia, small window log, zstdcli, 7095967 +silesia, long distance mode, zstdcli, 4833785 +silesia, multithreaded, zstdcli, 4842123 +silesia, multithreaded long distance mode, zstdcli, 4833785 +silesia, small window log, zstdcli, 7095048 silesia, small hash log, zstdcli, 6526189 silesia, small chain log, zstdcli, 4912245 silesia, explicit params, zstdcli, 4795432 -silesia, uncompressed literals, zstdcli, 5128030 +silesia, uncompressed literals, zstdcli, 5120614 silesia, uncompressed literals optimal, zstdcli, 4319566 silesia, huffman literals, zstdcli, 5326394 -silesia, multithreaded with advanced params, zstdcli, 5128030 +silesia, multithreaded with advanced params, zstdcli, 5120614 silesia.tar, level -5, zstdcli, 7363866 silesia.tar, level -3, zstdcli, 6902158 silesia.tar, level -1, zstdcli, 6182939 -silesia.tar, level 0, zstdcli, 4861512 +silesia.tar, level 0, zstdcli, 4854164 silesia.tar, level 1, zstdcli, 5333183 -silesia.tar, level 3, zstdcli, 4861512 -silesia.tar, level 4, zstdcli, 4800528 +silesia.tar, level 3, zstdcli, 4854164 +silesia.tar, level 4, zstdcli, 4792352 silesia.tar, level 5, zstdcli, 4650946 silesia.tar, level 6, zstdcli, 4618390 silesia.tar, level 7, zstdcli, 4578719 @@ -136,29 +136,29 @@ silesia.tar, level 9, zstdcli, silesia.tar, level 13, zstdcli, 4502960 silesia.tar, level 16, zstdcli, 4360531 silesia.tar, level 19, zstdcli, 4267270 -silesia.tar, no source size, zstdcli, 4861508 -silesia.tar, long distance mode, zstdcli, 4853225 -silesia.tar, multithreaded, zstdcli, 4861512 -silesia.tar, multithreaded long distance mode, zstdcli, 4853225 -silesia.tar, small window log, zstdcli, 7101576 +silesia.tar, no source size, zstdcli, 4854160 +silesia.tar, long distance mode, zstdcli, 4845745 +silesia.tar, multithreaded, zstdcli, 4854164 +silesia.tar, multithreaded long distance mode, zstdcli, 4845745 +silesia.tar, small window log, zstdcli, 7100701 silesia.tar, small hash log, zstdcli, 6529289 silesia.tar, small chain log, zstdcli, 4917022 silesia.tar, explicit params, zstdcli, 4820713 -silesia.tar, uncompressed literals, zstdcli, 5129559 +silesia.tar, uncompressed literals, zstdcli, 5122571 silesia.tar, uncompressed literals optimal, zstdcli, 4310145 silesia.tar, huffman literals, zstdcli, 5344915 -silesia.tar, multithreaded with advanced params, zstdcli, 5129559 +silesia.tar, multithreaded with advanced params, zstdcli, 5122571 github, level -5, zstdcli, 234315 github, level -5 with dict, zstdcli, 48718 github, level -3, zstdcli, 222760 github, level -3 with dict, zstdcli, 47395 github, level -1, zstdcli, 177468 github, level -1 with dict, zstdcli, 45170 -github, level 0, zstdcli, 138335 +github, level 0, zstdcli, 138332 github, level 0 with dict, zstdcli, 43148 github, level 1, zstdcli, 144365 github, level 1 with dict, zstdcli, 43682 -github, level 3, zstdcli, 138335 +github, level 3, zstdcli, 138332 github, level 3 with dict, zstdcli, 43148 github, level 4, zstdcli, 138199 github, level 4 with dict, zstdcli, 43251 @@ -176,30 +176,30 @@ github, level 16, zstdcli, github, level 16 with dict, zstdcli, 39577 github, level 19, zstdcli, 136064 github, level 19 with dict, zstdcli, 39576 -github, long distance mode, zstdcli, 138335 -github, multithreaded, zstdcli, 138335 -github, multithreaded long distance mode, zstdcli, 138335 -github, small window log, zstdcli, 138335 +github, long distance mode, zstdcli, 138332 +github, multithreaded, zstdcli, 138332 +github, multithreaded long distance mode, zstdcli, 138332 +github, small window log, zstdcli, 138332 github, small hash log, zstdcli, 137590 github, small chain log, zstdcli, 138341 github, explicit params, zstdcli, 136197 -github, uncompressed literals, zstdcli, 167915 +github, uncompressed literals, zstdcli, 167911 github, uncompressed literals optimal, zstdcli, 159227 github, huffman literals, zstdcli, 144365 -github, multithreaded with advanced params, zstdcli, 167915 +github, multithreaded with advanced params, zstdcli, 167911 github.tar, level -5, zstdcli, 66918 github.tar, level -5 with dict, zstdcli, 51529 github.tar, level -3, zstdcli, 52131 github.tar, level -3 with dict, zstdcli, 44246 github.tar, level -1, zstdcli, 42564 github.tar, level -1 with dict, zstdcli, 41140 -github.tar, level 0, zstdcli, 38445 +github.tar, level 0, zstdcli, 38835 github.tar, level 0 with dict, zstdcli, 37999 github.tar, level 1, zstdcli, 39204 github.tar, level 1 with dict, zstdcli, 38288 -github.tar, level 3, zstdcli, 38445 +github.tar, level 3, zstdcli, 38835 github.tar, level 3 with dict, zstdcli, 37999 -github.tar, level 4, zstdcli, 38471 +github.tar, level 4, zstdcli, 38897 github.tar, level 4 with dict, zstdcli, 37952 github.tar, level 5, zstdcli, 38370 github.tar, level 5 with dict, zstdcli, 39071 @@ -215,26 +215,26 @@ github.tar, level 16, zstdcli, github.tar, level 16 with dict, zstdcli, 33382 github.tar, level 19, zstdcli, 32138 github.tar, level 19 with dict, zstdcli, 32713 -github.tar, no source size, zstdcli, 38442 +github.tar, no source size, zstdcli, 38832 github.tar, no source size with dict, zstdcli, 38004 -github.tar, long distance mode, zstdcli, 39730 -github.tar, multithreaded, zstdcli, 38445 -github.tar, multithreaded long distance mode, zstdcli, 39730 +github.tar, long distance mode, zstdcli, 40236 +github.tar, multithreaded, zstdcli, 38835 +github.tar, multithreaded long distance mode, zstdcli, 40236 github.tar, small window log, zstdcli, 198544 github.tar, small hash log, zstdcli, 129874 github.tar, small chain log, zstdcli, 41673 github.tar, explicit params, zstdcli, 41385 -github.tar, uncompressed literals, zstdcli, 41126 +github.tar, uncompressed literals, zstdcli, 41529 github.tar, uncompressed literals optimal, zstdcli, 35401 github.tar, huffman literals, zstdcli, 38857 -github.tar, multithreaded with advanced params, zstdcli, 41126 +github.tar, multithreaded with advanced params, zstdcli, 41529 silesia, level -5, advanced one pass, 7354675 silesia, level -3, advanced one pass, 6902374 silesia, level -1, advanced one pass, 6177565 -silesia, level 0, advanced one pass, 4849553 +silesia, level 0, advanced one pass, 4842075 silesia, level 1, advanced one pass, 5309098 -silesia, level 3, advanced one pass, 4849553 -silesia, level 4, advanced one pass, 4786968 +silesia, level 3, advanced one pass, 4842075 +silesia, level 4, advanced one pass, 4779186 silesia, level 5 row 1, advanced one pass, 4638691 silesia, level 5 row 2, advanced one pass, 4640752 silesia, level 5, advanced one pass, 4638691 @@ -250,25 +250,25 @@ silesia, level 12 row 2, advanced silesia, level 13, advanced one pass, 4493990 silesia, level 16, advanced one pass, 4359864 silesia, level 19, advanced one pass, 4296880 -silesia, no source size, advanced one pass, 4849553 -silesia, long distance mode, advanced one pass, 4840737 -silesia, multithreaded, advanced one pass, 4849553 -silesia, multithreaded long distance mode, advanced one pass, 4840759 -silesia, small window log, advanced one pass, 7095919 +silesia, no source size, advanced one pass, 4842075 +silesia, long distance mode, advanced one pass, 4833710 +silesia, multithreaded, advanced one pass, 4842075 +silesia, multithreaded long distance mode, advanced one pass, 4833737 +silesia, small window log, advanced one pass, 7095000 silesia, small hash log, advanced one pass, 6526141 silesia, small chain log, advanced one pass, 4912197 silesia, explicit params, advanced one pass, 4795432 -silesia, uncompressed literals, advanced one pass, 5127982 +silesia, uncompressed literals, advanced one pass, 5120566 silesia, uncompressed literals optimal, advanced one pass, 4319518 silesia, huffman literals, advanced one pass, 5326346 -silesia, multithreaded with advanced params, advanced one pass, 5127982 +silesia, multithreaded with advanced params, advanced one pass, 5120566 silesia.tar, level -5, advanced one pass, 7359401 silesia.tar, level -3, advanced one pass, 6901672 silesia.tar, level -1, advanced one pass, 6182241 -silesia.tar, level 0, advanced one pass, 4861424 +silesia.tar, level 0, advanced one pass, 4854086 silesia.tar, level 1, advanced one pass, 5331946 -silesia.tar, level 3, advanced one pass, 4861424 -silesia.tar, level 4, advanced one pass, 4799632 +silesia.tar, level 3, advanced one pass, 4854086 +silesia.tar, level 4, advanced one pass, 4791503 silesia.tar, level 5 row 1, advanced one pass, 4649987 silesia.tar, level 5 row 2, advanced one pass, 4652862 silesia.tar, level 5, advanced one pass, 4649987 @@ -284,25 +284,25 @@ silesia.tar, level 12 row 2, advanced silesia.tar, level 13, advanced one pass, 4502956 silesia.tar, level 16, advanced one pass, 4360527 silesia.tar, level 19, advanced one pass, 4267266 -silesia.tar, no source size, advanced one pass, 4861424 -silesia.tar, long distance mode, advanced one pass, 4847752 -silesia.tar, multithreaded, advanced one pass, 4861508 -silesia.tar, multithreaded long distance mode, advanced one pass, 4853221 -silesia.tar, small window log, advanced one pass, 7101530 +silesia.tar, no source size, advanced one pass, 4854086 +silesia.tar, long distance mode, advanced one pass, 4840452 +silesia.tar, multithreaded, advanced one pass, 4854160 +silesia.tar, multithreaded long distance mode, advanced one pass, 4845741 +silesia.tar, small window log, advanced one pass, 7100655 silesia.tar, small hash log, advanced one pass, 6529231 silesia.tar, small chain log, advanced one pass, 4917041 silesia.tar, explicit params, advanced one pass, 4806855 -silesia.tar, uncompressed literals, advanced one pass, 5129458 +silesia.tar, uncompressed literals, advanced one pass, 5122473 silesia.tar, uncompressed literals optimal, advanced one pass, 4310141 silesia.tar, huffman literals, advanced one pass, 5344545 -silesia.tar, multithreaded with advanced params, advanced one pass, 5129555 +silesia.tar, multithreaded with advanced params, advanced one pass, 5122567 github, level -5, advanced one pass, 232315 github, level -5 with dict, advanced one pass, 46718 github, level -3, advanced one pass, 220760 github, level -3 with dict, advanced one pass, 45395 github, level -1, advanced one pass, 175468 github, level -1 with dict, advanced one pass, 43170 -github, level 0, advanced one pass, 136335 +github, level 0, advanced one pass, 136332 github, level 0 with dict, advanced one pass, 41148 github, level 0 with dict dms, advanced one pass, 41148 github, level 0 with dict dds, advanced one pass, 41148 @@ -314,7 +314,7 @@ github, level 1 with dict dms, advanced github, level 1 with dict dds, advanced one pass, 41682 github, level 1 with dict copy, advanced one pass, 41674 github, level 1 with dict load, advanced one pass, 43755 -github, level 3, advanced one pass, 136335 +github, level 3, advanced one pass, 136332 github, level 3 with dict, advanced one pass, 41148 github, level 3 with dict dms, advanced one pass, 41148 github, level 3 with dict dds, advanced one pass, 41148 @@ -408,26 +408,26 @@ github, level 19 with dict dms, advanced github, level 19 with dict dds, advanced one pass, 37576 github, level 19 with dict copy, advanced one pass, 37567 github, level 19 with dict load, advanced one pass, 39613 -github, no source size, advanced one pass, 136335 +github, no source size, advanced one pass, 136332 github, no source size with dict, advanced one pass, 41148 -github, long distance mode, advanced one pass, 136335 -github, multithreaded, advanced one pass, 136335 -github, multithreaded long distance mode, advanced one pass, 136335 -github, small window log, advanced one pass, 136335 +github, long distance mode, advanced one pass, 136332 +github, multithreaded, advanced one pass, 136332 +github, multithreaded long distance mode, advanced one pass, 136332 +github, small window log, advanced one pass, 136332 github, small hash log, advanced one pass, 135590 github, small chain log, advanced one pass, 136341 github, explicit params, advanced one pass, 137727 -github, uncompressed literals, advanced one pass, 165915 +github, uncompressed literals, advanced one pass, 165911 github, uncompressed literals optimal, advanced one pass, 157227 github, huffman literals, advanced one pass, 142365 -github, multithreaded with advanced params, advanced one pass, 165915 +github, multithreaded with advanced params, advanced one pass, 165911 github.tar, level -5, advanced one pass, 66914 github.tar, level -5 with dict, advanced one pass, 51525 github.tar, level -3, advanced one pass, 52127 github.tar, level -3 with dict, advanced one pass, 44242 github.tar, level -1, advanced one pass, 42560 github.tar, level -1 with dict, advanced one pass, 41136 -github.tar, level 0, advanced one pass, 38441 +github.tar, level 0, advanced one pass, 38831 github.tar, level 0 with dict, advanced one pass, 37995 github.tar, level 0 with dict dms, advanced one pass, 38003 github.tar, level 0 with dict dds, advanced one pass, 38003 @@ -439,13 +439,13 @@ github.tar, level 1 with dict dms, advanced github.tar, level 1 with dict dds, advanced one pass, 38294 github.tar, level 1 with dict copy, advanced one pass, 38284 github.tar, level 1 with dict load, advanced one pass, 38724 -github.tar, level 3, advanced one pass, 38441 +github.tar, level 3, advanced one pass, 38831 github.tar, level 3 with dict, advanced one pass, 37995 github.tar, level 3 with dict dms, advanced one pass, 38003 github.tar, level 3 with dict dds, advanced one pass, 38003 github.tar, level 3 with dict copy, advanced one pass, 37995 github.tar, level 3 with dict load, advanced one pass, 37956 -github.tar, level 4, advanced one pass, 38467 +github.tar, level 4, advanced one pass, 38893 github.tar, level 4 with dict, advanced one pass, 37948 github.tar, level 4 with dict dms, advanced one pass, 37954 github.tar, level 4 with dict dds, advanced one pass, 37954 @@ -533,26 +533,26 @@ github.tar, level 19 with dict dms, advanced github.tar, level 19 with dict dds, advanced one pass, 32553 github.tar, level 19 with dict copy, advanced one pass, 32709 github.tar, level 19 with dict load, advanced one pass, 32474 -github.tar, no source size, advanced one pass, 38441 +github.tar, no source size, advanced one pass, 38831 github.tar, no source size with dict, advanced one pass, 37995 -github.tar, long distance mode, advanced one pass, 39757 -github.tar, multithreaded, advanced one pass, 38441 -github.tar, multithreaded long distance mode, advanced one pass, 39726 +github.tar, long distance mode, advanced one pass, 40252 +github.tar, multithreaded, advanced one pass, 38831 +github.tar, multithreaded long distance mode, advanced one pass, 40232 github.tar, small window log, advanced one pass, 198540 github.tar, small hash log, advanced one pass, 129870 github.tar, small chain log, advanced one pass, 41669 github.tar, explicit params, advanced one pass, 41385 -github.tar, uncompressed literals, advanced one pass, 41122 +github.tar, uncompressed literals, advanced one pass, 41525 github.tar, uncompressed literals optimal, advanced one pass, 35397 github.tar, huffman literals, advanced one pass, 38853 -github.tar, multithreaded with advanced params, advanced one pass, 41122 +github.tar, multithreaded with advanced params, advanced one pass, 41525 silesia, level -5, advanced one pass small out, 7354675 silesia, level -3, advanced one pass small out, 6902374 silesia, level -1, advanced one pass small out, 6177565 -silesia, level 0, advanced one pass small out, 4849553 +silesia, level 0, advanced one pass small out, 4842075 silesia, level 1, advanced one pass small out, 5309098 -silesia, level 3, advanced one pass small out, 4849553 -silesia, level 4, advanced one pass small out, 4786968 +silesia, level 3, advanced one pass small out, 4842075 +silesia, level 4, advanced one pass small out, 4779186 silesia, level 5 row 1, advanced one pass small out, 4638691 silesia, level 5 row 2, advanced one pass small out, 4640752 silesia, level 5, advanced one pass small out, 4638691 @@ -568,25 +568,25 @@ silesia, level 12 row 2, advanced silesia, level 13, advanced one pass small out, 4493990 silesia, level 16, advanced one pass small out, 4359864 silesia, level 19, advanced one pass small out, 4296880 -silesia, no source size, advanced one pass small out, 4849553 -silesia, long distance mode, advanced one pass small out, 4840737 -silesia, multithreaded, advanced one pass small out, 4849553 -silesia, multithreaded long distance mode, advanced one pass small out, 4840759 -silesia, small window log, advanced one pass small out, 7095919 +silesia, no source size, advanced one pass small out, 4842075 +silesia, long distance mode, advanced one pass small out, 4833710 +silesia, multithreaded, advanced one pass small out, 4842075 +silesia, multithreaded long distance mode, advanced one pass small out, 4833737 +silesia, small window log, advanced one pass small out, 7095000 silesia, small hash log, advanced one pass small out, 6526141 silesia, small chain log, advanced one pass small out, 4912197 silesia, explicit params, advanced one pass small out, 4795432 -silesia, uncompressed literals, advanced one pass small out, 5127982 +silesia, uncompressed literals, advanced one pass small out, 5120566 silesia, uncompressed literals optimal, advanced one pass small out, 4319518 silesia, huffman literals, advanced one pass small out, 5326346 -silesia, multithreaded with advanced params, advanced one pass small out, 5127982 +silesia, multithreaded with advanced params, advanced one pass small out, 5120566 silesia.tar, level -5, advanced one pass small out, 7359401 silesia.tar, level -3, advanced one pass small out, 6901672 silesia.tar, level -1, advanced one pass small out, 6182241 -silesia.tar, level 0, advanced one pass small out, 4861424 +silesia.tar, level 0, advanced one pass small out, 4854086 silesia.tar, level 1, advanced one pass small out, 5331946 -silesia.tar, level 3, advanced one pass small out, 4861424 -silesia.tar, level 4, advanced one pass small out, 4799632 +silesia.tar, level 3, advanced one pass small out, 4854086 +silesia.tar, level 4, advanced one pass small out, 4791503 silesia.tar, level 5 row 1, advanced one pass small out, 4649987 silesia.tar, level 5 row 2, advanced one pass small out, 4652862 silesia.tar, level 5, advanced one pass small out, 4649987 @@ -602,25 +602,25 @@ silesia.tar, level 12 row 2, advanced silesia.tar, level 13, advanced one pass small out, 4502956 silesia.tar, level 16, advanced one pass small out, 4360527 silesia.tar, level 19, advanced one pass small out, 4267266 -silesia.tar, no source size, advanced one pass small out, 4861424 -silesia.tar, long distance mode, advanced one pass small out, 4847752 -silesia.tar, multithreaded, advanced one pass small out, 4861508 -silesia.tar, multithreaded long distance mode, advanced one pass small out, 4853221 -silesia.tar, small window log, advanced one pass small out, 7101530 +silesia.tar, no source size, advanced one pass small out, 4854086 +silesia.tar, long distance mode, advanced one pass small out, 4840452 +silesia.tar, multithreaded, advanced one pass small out, 4854160 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4845741 +silesia.tar, small window log, advanced one pass small out, 7100655 silesia.tar, small hash log, advanced one pass small out, 6529231 silesia.tar, small chain log, advanced one pass small out, 4917041 silesia.tar, explicit params, advanced one pass small out, 4806855 -silesia.tar, uncompressed literals, advanced one pass small out, 5129458 +silesia.tar, uncompressed literals, advanced one pass small out, 5122473 silesia.tar, uncompressed literals optimal, advanced one pass small out, 4310141 silesia.tar, huffman literals, advanced one pass small out, 5344545 -silesia.tar, multithreaded with advanced params, advanced one pass small out, 5129555 +silesia.tar, multithreaded with advanced params, advanced one pass small out, 5122567 github, level -5, advanced one pass small out, 232315 github, level -5 with dict, advanced one pass small out, 46718 github, level -3, advanced one pass small out, 220760 github, level -3 with dict, advanced one pass small out, 45395 github, level -1, advanced one pass small out, 175468 github, level -1 with dict, advanced one pass small out, 43170 -github, level 0, advanced one pass small out, 136335 +github, level 0, advanced one pass small out, 136332 github, level 0 with dict, advanced one pass small out, 41148 github, level 0 with dict dms, advanced one pass small out, 41148 github, level 0 with dict dds, advanced one pass small out, 41148 @@ -632,7 +632,7 @@ github, level 1 with dict dms, advanced github, level 1 with dict dds, advanced one pass small out, 41682 github, level 1 with dict copy, advanced one pass small out, 41674 github, level 1 with dict load, advanced one pass small out, 43755 -github, level 3, advanced one pass small out, 136335 +github, level 3, advanced one pass small out, 136332 github, level 3 with dict, advanced one pass small out, 41148 github, level 3 with dict dms, advanced one pass small out, 41148 github, level 3 with dict dds, advanced one pass small out, 41148 @@ -726,26 +726,26 @@ github, level 19 with dict dms, advanced github, level 19 with dict dds, advanced one pass small out, 37576 github, level 19 with dict copy, advanced one pass small out, 37567 github, level 19 with dict load, advanced one pass small out, 39613 -github, no source size, advanced one pass small out, 136335 +github, no source size, advanced one pass small out, 136332 github, no source size with dict, advanced one pass small out, 41148 -github, long distance mode, advanced one pass small out, 136335 -github, multithreaded, advanced one pass small out, 136335 -github, multithreaded long distance mode, advanced one pass small out, 136335 -github, small window log, advanced one pass small out, 136335 +github, long distance mode, advanced one pass small out, 136332 +github, multithreaded, advanced one pass small out, 136332 +github, multithreaded long distance mode, advanced one pass small out, 136332 +github, small window log, advanced one pass small out, 136332 github, small hash log, advanced one pass small out, 135590 github, small chain log, advanced one pass small out, 136341 github, explicit params, advanced one pass small out, 137727 -github, uncompressed literals, advanced one pass small out, 165915 +github, uncompressed literals, advanced one pass small out, 165911 github, uncompressed literals optimal, advanced one pass small out, 157227 github, huffman literals, advanced one pass small out, 142365 -github, multithreaded with advanced params, advanced one pass small out, 165915 +github, multithreaded with advanced params, advanced one pass small out, 165911 github.tar, level -5, advanced one pass small out, 66914 github.tar, level -5 with dict, advanced one pass small out, 51525 github.tar, level -3, advanced one pass small out, 52127 github.tar, level -3 with dict, advanced one pass small out, 44242 github.tar, level -1, advanced one pass small out, 42560 github.tar, level -1 with dict, advanced one pass small out, 41136 -github.tar, level 0, advanced one pass small out, 38441 +github.tar, level 0, advanced one pass small out, 38831 github.tar, level 0 with dict, advanced one pass small out, 37995 github.tar, level 0 with dict dms, advanced one pass small out, 38003 github.tar, level 0 with dict dds, advanced one pass small out, 38003 @@ -757,13 +757,13 @@ github.tar, level 1 with dict dms, advanced github.tar, level 1 with dict dds, advanced one pass small out, 38294 github.tar, level 1 with dict copy, advanced one pass small out, 38284 github.tar, level 1 with dict load, advanced one pass small out, 38724 -github.tar, level 3, advanced one pass small out, 38441 +github.tar, level 3, advanced one pass small out, 38831 github.tar, level 3 with dict, advanced one pass small out, 37995 github.tar, level 3 with dict dms, advanced one pass small out, 38003 github.tar, level 3 with dict dds, advanced one pass small out, 38003 github.tar, level 3 with dict copy, advanced one pass small out, 37995 github.tar, level 3 with dict load, advanced one pass small out, 37956 -github.tar, level 4, advanced one pass small out, 38467 +github.tar, level 4, advanced one pass small out, 38893 github.tar, level 4 with dict, advanced one pass small out, 37948 github.tar, level 4 with dict dms, advanced one pass small out, 37954 github.tar, level 4 with dict dds, advanced one pass small out, 37954 @@ -851,26 +851,26 @@ github.tar, level 19 with dict dms, advanced github.tar, level 19 with dict dds, advanced one pass small out, 32553 github.tar, level 19 with dict copy, advanced one pass small out, 32709 github.tar, level 19 with dict load, advanced one pass small out, 32474 -github.tar, no source size, advanced one pass small out, 38441 +github.tar, no source size, advanced one pass small out, 38831 github.tar, no source size with dict, advanced one pass small out, 37995 -github.tar, long distance mode, advanced one pass small out, 39757 -github.tar, multithreaded, advanced one pass small out, 38441 -github.tar, multithreaded long distance mode, advanced one pass small out, 39726 +github.tar, long distance mode, advanced one pass small out, 40252 +github.tar, multithreaded, advanced one pass small out, 38831 +github.tar, multithreaded long distance mode, advanced one pass small out, 40232 github.tar, small window log, advanced one pass small out, 198540 github.tar, small hash log, advanced one pass small out, 129870 github.tar, small chain log, advanced one pass small out, 41669 github.tar, explicit params, advanced one pass small out, 41385 -github.tar, uncompressed literals, advanced one pass small out, 41122 +github.tar, uncompressed literals, advanced one pass small out, 41525 github.tar, uncompressed literals optimal, advanced one pass small out, 35397 github.tar, huffman literals, advanced one pass small out, 38853 -github.tar, multithreaded with advanced params, advanced one pass small out, 41122 +github.tar, multithreaded with advanced params, advanced one pass small out, 41525 silesia, level -5, advanced streaming, 7292053 silesia, level -3, advanced streaming, 6867875 silesia, level -1, advanced streaming, 6183923 -silesia, level 0, advanced streaming, 4849553 +silesia, level 0, advanced streaming, 4842075 silesia, level 1, advanced streaming, 5312694 -silesia, level 3, advanced streaming, 4849553 -silesia, level 4, advanced streaming, 4786968 +silesia, level 3, advanced streaming, 4842075 +silesia, level 4, advanced streaming, 4779186 silesia, level 5 row 1, advanced streaming, 4638691 silesia, level 5 row 2, advanced streaming, 4640752 silesia, level 5, advanced streaming, 4638691 @@ -886,25 +886,25 @@ silesia, level 12 row 2, advanced silesia, level 13, advanced streaming, 4493990 silesia, level 16, advanced streaming, 4359864 silesia, level 19, advanced streaming, 4296880 -silesia, no source size, advanced streaming, 4849517 -silesia, long distance mode, advanced streaming, 4840737 -silesia, multithreaded, advanced streaming, 4849553 -silesia, multithreaded long distance mode, advanced streaming, 4840759 -silesia, small window log, advanced streaming, 7112062 +silesia, no source size, advanced streaming, 4842039 +silesia, long distance mode, advanced streaming, 4833710 +silesia, multithreaded, advanced streaming, 4842075 +silesia, multithreaded long distance mode, advanced streaming, 4833737 +silesia, small window log, advanced streaming, 7111103 silesia, small hash log, advanced streaming, 6526141 silesia, small chain log, advanced streaming, 4912197 silesia, explicit params, advanced streaming, 4795452 -silesia, uncompressed literals, advanced streaming, 5127982 +silesia, uncompressed literals, advanced streaming, 5120566 silesia, uncompressed literals optimal, advanced streaming, 4319518 silesia, huffman literals, advanced streaming, 5332234 -silesia, multithreaded with advanced params, advanced streaming, 5127982 +silesia, multithreaded with advanced params, advanced streaming, 5120566 silesia.tar, level -5, advanced streaming, 7260007 silesia.tar, level -3, advanced streaming, 6845151 silesia.tar, level -1, advanced streaming, 6187938 -silesia.tar, level 0, advanced streaming, 4861426 +silesia.tar, level 0, advanced streaming, 4859271 silesia.tar, level 1, advanced streaming, 5334890 -silesia.tar, level 3, advanced streaming, 4861426 -silesia.tar, level 4, advanced streaming, 4799632 +silesia.tar, level 3, advanced streaming, 4859271 +silesia.tar, level 4, advanced streaming, 4797470 silesia.tar, level 5 row 1, advanced streaming, 4649992 silesia.tar, level 5 row 2, advanced streaming, 4652866 silesia.tar, level 5, advanced streaming, 4649992 @@ -920,25 +920,25 @@ silesia.tar, level 12 row 2, advanced silesia.tar, level 13, advanced streaming, 4502956 silesia.tar, level 16, advanced streaming, 4360527 silesia.tar, level 19, advanced streaming, 4267266 -silesia.tar, no source size, advanced streaming, 4861422 -silesia.tar, long distance mode, advanced streaming, 4847752 -silesia.tar, multithreaded, advanced streaming, 4861508 -silesia.tar, multithreaded long distance mode, advanced streaming, 4853221 -silesia.tar, small window log, advanced streaming, 7118769 +silesia.tar, no source size, advanced streaming, 4859267 +silesia.tar, long distance mode, advanced streaming, 4840452 +silesia.tar, multithreaded, advanced streaming, 4854160 +silesia.tar, multithreaded long distance mode, advanced streaming, 4845741 +silesia.tar, small window log, advanced streaming, 7117559 silesia.tar, small hash log, advanced streaming, 6529234 silesia.tar, small chain log, advanced streaming, 4917021 silesia.tar, explicit params, advanced streaming, 4806873 -silesia.tar, uncompressed literals, advanced streaming, 5129461 +silesia.tar, uncompressed literals, advanced streaming, 5127423 silesia.tar, uncompressed literals optimal, advanced streaming, 4310141 silesia.tar, huffman literals, advanced streaming, 5350519 -silesia.tar, multithreaded with advanced params, advanced streaming, 5129555 +silesia.tar, multithreaded with advanced params, advanced streaming, 5122567 github, level -5, advanced streaming, 232315 github, level -5 with dict, advanced streaming, 46718 github, level -3, advanced streaming, 220760 github, level -3 with dict, advanced streaming, 45395 github, level -1, advanced streaming, 175468 github, level -1 with dict, advanced streaming, 43170 -github, level 0, advanced streaming, 136335 +github, level 0, advanced streaming, 136332 github, level 0 with dict, advanced streaming, 41148 github, level 0 with dict dms, advanced streaming, 41148 github, level 0 with dict dds, advanced streaming, 41148 @@ -950,7 +950,7 @@ github, level 1 with dict dms, advanced github, level 1 with dict dds, advanced streaming, 41682 github, level 1 with dict copy, advanced streaming, 41674 github, level 1 with dict load, advanced streaming, 43755 -github, level 3, advanced streaming, 136335 +github, level 3, advanced streaming, 136332 github, level 3 with dict, advanced streaming, 41148 github, level 3 with dict dms, advanced streaming, 41148 github, level 3 with dict dds, advanced streaming, 41148 @@ -1044,26 +1044,26 @@ github, level 19 with dict dms, advanced github, level 19 with dict dds, advanced streaming, 37576 github, level 19 with dict copy, advanced streaming, 37567 github, level 19 with dict load, advanced streaming, 39613 -github, no source size, advanced streaming, 136335 +github, no source size, advanced streaming, 136332 github, no source size with dict, advanced streaming, 41148 -github, long distance mode, advanced streaming, 136335 -github, multithreaded, advanced streaming, 136335 -github, multithreaded long distance mode, advanced streaming, 136335 -github, small window log, advanced streaming, 136335 +github, long distance mode, advanced streaming, 136332 +github, multithreaded, advanced streaming, 136332 +github, multithreaded long distance mode, advanced streaming, 136332 +github, small window log, advanced streaming, 136332 github, small hash log, advanced streaming, 135590 github, small chain log, advanced streaming, 136341 github, explicit params, advanced streaming, 137727 -github, uncompressed literals, advanced streaming, 165915 +github, uncompressed literals, advanced streaming, 165911 github, uncompressed literals optimal, advanced streaming, 157227 github, huffman literals, advanced streaming, 142365 -github, multithreaded with advanced params, advanced streaming, 165915 +github, multithreaded with advanced params, advanced streaming, 165911 github.tar, level -5, advanced streaming, 64132 github.tar, level -5 with dict, advanced streaming, 48642 github.tar, level -3, advanced streaming, 50964 github.tar, level -3 with dict, advanced streaming, 42750 github.tar, level -1, advanced streaming, 42536 github.tar, level -1 with dict, advanced streaming, 41198 -github.tar, level 0, advanced streaming, 38441 +github.tar, level 0, advanced streaming, 38831 github.tar, level 0 with dict, advanced streaming, 37995 github.tar, level 0 with dict dms, advanced streaming, 38003 github.tar, level 0 with dict dds, advanced streaming, 38003 @@ -1075,13 +1075,13 @@ github.tar, level 1 with dict dms, advanced github.tar, level 1 with dict dds, advanced streaming, 38326 github.tar, level 1 with dict copy, advanced streaming, 38316 github.tar, level 1 with dict load, advanced streaming, 38761 -github.tar, level 3, advanced streaming, 38441 +github.tar, level 3, advanced streaming, 38831 github.tar, level 3 with dict, advanced streaming, 37995 github.tar, level 3 with dict dms, advanced streaming, 38003 github.tar, level 3 with dict dds, advanced streaming, 38003 github.tar, level 3 with dict copy, advanced streaming, 37995 github.tar, level 3 with dict load, advanced streaming, 37956 -github.tar, level 4, advanced streaming, 38467 +github.tar, level 4, advanced streaming, 38893 github.tar, level 4 with dict, advanced streaming, 37948 github.tar, level 4 with dict dms, advanced streaming, 37954 github.tar, level 4 with dict dds, advanced streaming, 37954 @@ -1169,26 +1169,26 @@ github.tar, level 19 with dict dms, advanced github.tar, level 19 with dict dds, advanced streaming, 32553 github.tar, level 19 with dict copy, advanced streaming, 32709 github.tar, level 19 with dict load, advanced streaming, 32474 -github.tar, no source size, advanced streaming, 38438 +github.tar, no source size, advanced streaming, 38828 github.tar, no source size with dict, advanced streaming, 38000 -github.tar, long distance mode, advanced streaming, 39757 -github.tar, multithreaded, advanced streaming, 38441 -github.tar, multithreaded long distance mode, advanced streaming, 39726 +github.tar, long distance mode, advanced streaming, 40252 +github.tar, multithreaded, advanced streaming, 38831 +github.tar, multithreaded long distance mode, advanced streaming, 40232 github.tar, small window log, advanced streaming, 199558 github.tar, small hash log, advanced streaming, 129870 github.tar, small chain log, advanced streaming, 41669 github.tar, explicit params, advanced streaming, 41385 -github.tar, uncompressed literals, advanced streaming, 41122 +github.tar, uncompressed literals, advanced streaming, 41525 github.tar, uncompressed literals optimal, advanced streaming, 35397 github.tar, huffman literals, advanced streaming, 38874 -github.tar, multithreaded with advanced params, advanced streaming, 41122 +github.tar, multithreaded with advanced params, advanced streaming, 41525 silesia, level -5, old streaming, 7292053 silesia, level -3, old streaming, 6867875 silesia, level -1, old streaming, 6183923 -silesia, level 0, old streaming, 4849553 +silesia, level 0, old streaming, 4842075 silesia, level 1, old streaming, 5312694 -silesia, level 3, old streaming, 4849553 -silesia, level 4, old streaming, 4786968 +silesia, level 3, old streaming, 4842075 +silesia, level 4, old streaming, 4779186 silesia, level 5, old streaming, 4638691 silesia, level 6, old streaming, 4605296 silesia, level 7, old streaming, 4566984 @@ -1196,17 +1196,17 @@ silesia, level 9, old stre silesia, level 13, old streaming, 4493990 silesia, level 16, old streaming, 4359864 silesia, level 19, old streaming, 4296880 -silesia, no source size, old streaming, 4849517 -silesia, uncompressed literals, old streaming, 4849553 +silesia, no source size, old streaming, 4842039 +silesia, uncompressed literals, old streaming, 4842075 silesia, uncompressed literals optimal, old streaming, 4296880 silesia, huffman literals, old streaming, 6183923 silesia.tar, level -5, old streaming, 7260007 silesia.tar, level -3, old streaming, 6845151 silesia.tar, level -1, old streaming, 6187938 -silesia.tar, level 0, old streaming, 4861426 +silesia.tar, level 0, old streaming, 4859271 silesia.tar, level 1, old streaming, 5334890 -silesia.tar, level 3, old streaming, 4861426 -silesia.tar, level 4, old streaming, 4799632 +silesia.tar, level 3, old streaming, 4859271 +silesia.tar, level 4, old streaming, 4797470 silesia.tar, level 5, old streaming, 4649992 silesia.tar, level 6, old streaming, 4616803 silesia.tar, level 7, old streaming, 4576664 @@ -1214,8 +1214,8 @@ silesia.tar, level 9, old stre silesia.tar, level 13, old streaming, 4502956 silesia.tar, level 16, old streaming, 4360527 silesia.tar, level 19, old streaming, 4267266 -silesia.tar, no source size, old streaming, 4861422 -silesia.tar, uncompressed literals, old streaming, 4861426 +silesia.tar, no source size, old streaming, 4859267 +silesia.tar, uncompressed literals, old streaming, 4859271 silesia.tar, uncompressed literals optimal, old streaming, 4267266 silesia.tar, huffman literals, old streaming, 6187938 github, level -5, old streaming, 232315 @@ -1224,11 +1224,11 @@ github, level -3, old stre github, level -3 with dict, old streaming, 45395 github, level -1, old streaming, 175468 github, level -1 with dict, old streaming, 43170 -github, level 0, old streaming, 136335 +github, level 0, old streaming, 136332 github, level 0 with dict, old streaming, 41148 github, level 1, old streaming, 142365 github, level 1 with dict, old streaming, 41682 -github, level 3, old streaming, 136335 +github, level 3, old streaming, 136332 github, level 3 with dict, old streaming, 41148 github, level 4, old streaming, 136199 github, level 4 with dict, old streaming, 41251 @@ -1246,9 +1246,9 @@ github, level 16, old stre github, level 16 with dict, old streaming, 37577 github, level 19, old streaming, 134064 github, level 19 with dict, old streaming, 37576 -github, no source size, old streaming, 140632 +github, no source size, old streaming, 140599 github, no source size with dict, old streaming, 40654 -github, uncompressed literals, old streaming, 136335 +github, uncompressed literals, old streaming, 136332 github, uncompressed literals optimal, old streaming, 134064 github, huffman literals, old streaming, 175468 github.tar, level -5, old streaming, 64132 @@ -1257,13 +1257,13 @@ github.tar, level -3, old stre github.tar, level -3 with dict, old streaming, 42750 github.tar, level -1, old streaming, 42536 github.tar, level -1 with dict, old streaming, 41198 -github.tar, level 0, old streaming, 38441 +github.tar, level 0, old streaming, 38831 github.tar, level 0 with dict, old streaming, 37995 github.tar, level 1, old streaming, 39270 github.tar, level 1 with dict, old streaming, 38316 -github.tar, level 3, old streaming, 38441 +github.tar, level 3, old streaming, 38831 github.tar, level 3 with dict, old streaming, 37995 -github.tar, level 4, old streaming, 38467 +github.tar, level 4, old streaming, 38893 github.tar, level 4 with dict, old streaming, 37948 github.tar, level 5, old streaming, 38366 github.tar, level 5 with dict, old streaming, 39082 @@ -1279,18 +1279,18 @@ github.tar, level 16, old stre github.tar, level 16 with dict, old streaming, 33378 github.tar, level 19, old streaming, 32134 github.tar, level 19 with dict, old streaming, 32709 -github.tar, no source size, old streaming, 38438 +github.tar, no source size, old streaming, 38828 github.tar, no source size with dict, old streaming, 38000 -github.tar, uncompressed literals, old streaming, 38441 +github.tar, uncompressed literals, old streaming, 38831 github.tar, uncompressed literals optimal, old streaming, 32134 github.tar, huffman literals, old streaming, 42536 silesia, level -5, old streaming advanced, 7292053 silesia, level -3, old streaming advanced, 6867875 silesia, level -1, old streaming advanced, 6183923 -silesia, level 0, old streaming advanced, 4849553 +silesia, level 0, old streaming advanced, 4842075 silesia, level 1, old streaming advanced, 5312694 -silesia, level 3, old streaming advanced, 4849553 -silesia, level 4, old streaming advanced, 4786968 +silesia, level 3, old streaming advanced, 4842075 +silesia, level 4, old streaming advanced, 4779186 silesia, level 5, old streaming advanced, 4638691 silesia, level 6, old streaming advanced, 4605296 silesia, level 7, old streaming advanced, 4566984 @@ -1298,25 +1298,25 @@ silesia, level 9, old stre silesia, level 13, old streaming advanced, 4493990 silesia, level 16, old streaming advanced, 4359864 silesia, level 19, old streaming advanced, 4296880 -silesia, no source size, old streaming advanced, 4849517 -silesia, long distance mode, old streaming advanced, 4849553 -silesia, multithreaded, old streaming advanced, 4849553 -silesia, multithreaded long distance mode, old streaming advanced, 4849553 -silesia, small window log, old streaming advanced, 7112062 +silesia, no source size, old streaming advanced, 4842039 +silesia, long distance mode, old streaming advanced, 4842075 +silesia, multithreaded, old streaming advanced, 4842075 +silesia, multithreaded long distance mode, old streaming advanced, 4842075 +silesia, small window log, old streaming advanced, 7111103 silesia, small hash log, old streaming advanced, 6526141 silesia, small chain log, old streaming advanced, 4912197 silesia, explicit params, old streaming advanced, 4795452 -silesia, uncompressed literals, old streaming advanced, 4849553 +silesia, uncompressed literals, old streaming advanced, 4842075 silesia, uncompressed literals optimal, old streaming advanced, 4296880 silesia, huffman literals, old streaming advanced, 6183923 -silesia, multithreaded with advanced params, old streaming advanced, 4849553 +silesia, multithreaded with advanced params, old streaming advanced, 4842075 silesia.tar, level -5, old streaming advanced, 7260007 silesia.tar, level -3, old streaming advanced, 6845151 silesia.tar, level -1, old streaming advanced, 6187938 -silesia.tar, level 0, old streaming advanced, 4861426 +silesia.tar, level 0, old streaming advanced, 4859271 silesia.tar, level 1, old streaming advanced, 5334890 -silesia.tar, level 3, old streaming advanced, 4861426 -silesia.tar, level 4, old streaming advanced, 4799632 +silesia.tar, level 3, old streaming advanced, 4859271 +silesia.tar, level 4, old streaming advanced, 4797470 silesia.tar, level 5, old streaming advanced, 4649992 silesia.tar, level 6, old streaming advanced, 4616803 silesia.tar, level 7, old streaming advanced, 4576664 @@ -1324,18 +1324,18 @@ silesia.tar, level 9, old stre silesia.tar, level 13, old streaming advanced, 4502956 silesia.tar, level 16, old streaming advanced, 4360527 silesia.tar, level 19, old streaming advanced, 4267266 -silesia.tar, no source size, old streaming advanced, 4861422 -silesia.tar, long distance mode, old streaming advanced, 4861426 -silesia.tar, multithreaded, old streaming advanced, 4861426 -silesia.tar, multithreaded long distance mode, old streaming advanced, 4861426 -silesia.tar, small window log, old streaming advanced, 7118772 +silesia.tar, no source size, old streaming advanced, 4859267 +silesia.tar, long distance mode, old streaming advanced, 4859271 +silesia.tar, multithreaded, old streaming advanced, 4859271 +silesia.tar, multithreaded long distance mode, old streaming advanced, 4859271 +silesia.tar, small window log, old streaming advanced, 7117562 silesia.tar, small hash log, old streaming advanced, 6529234 silesia.tar, small chain log, old streaming advanced, 4917021 silesia.tar, explicit params, old streaming advanced, 4806873 -silesia.tar, uncompressed literals, old streaming advanced, 4861426 +silesia.tar, uncompressed literals, old streaming advanced, 4859271 silesia.tar, uncompressed literals optimal, old streaming advanced, 4267266 silesia.tar, huffman literals, old streaming advanced, 6187938 -silesia.tar, multithreaded with advanced params, old streaming advanced, 4861426 +silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 241214 github, level -5 with dict, old streaming advanced, 49562 github, level -3, old streaming advanced, 222937 @@ -1364,7 +1364,7 @@ github, level 16, old stre github, level 16 with dict, old streaming advanced, 40789 github, level 19, old streaming advanced, 134064 github, level 19 with dict, old streaming advanced, 37576 -github, no source size, old streaming advanced, 140632 +github, no source size, old streaming advanced, 140599 github, no source size with dict, old streaming advanced, 40608 github, long distance mode, old streaming advanced, 141104 github, multithreaded, old streaming advanced, 141104 @@ -1383,13 +1383,13 @@ github.tar, level -3, old stre github.tar, level -3 with dict, old streaming advanced, 43357 github.tar, level -1, old streaming advanced, 42536 github.tar, level -1 with dict, old streaming advanced, 41494 -github.tar, level 0, old streaming advanced, 38441 +github.tar, level 0, old streaming advanced, 38831 github.tar, level 0 with dict, old streaming advanced, 38013 github.tar, level 1, old streaming advanced, 39270 github.tar, level 1 with dict, old streaming advanced, 38934 -github.tar, level 3, old streaming advanced, 38441 +github.tar, level 3, old streaming advanced, 38831 github.tar, level 3 with dict, old streaming advanced, 38013 -github.tar, level 4, old streaming advanced, 38467 +github.tar, level 4, old streaming advanced, 38893 github.tar, level 4 with dict, old streaming advanced, 38063 github.tar, level 5, old streaming advanced, 38366 github.tar, level 5 with dict, old streaming advanced, 37728 @@ -1405,19 +1405,19 @@ github.tar, level 16, old stre github.tar, level 16 with dict, old streaming advanced, 38578 github.tar, level 19, old streaming advanced, 32134 github.tar, level 19 with dict, old streaming advanced, 32702 -github.tar, no source size, old streaming advanced, 38438 +github.tar, no source size, old streaming advanced, 38828 github.tar, no source size with dict, old streaming advanced, 38015 -github.tar, long distance mode, old streaming advanced, 38441 -github.tar, multithreaded, old streaming advanced, 38441 -github.tar, multithreaded long distance mode, old streaming advanced, 38441 +github.tar, long distance mode, old streaming advanced, 38831 +github.tar, multithreaded, old streaming advanced, 38831 +github.tar, multithreaded long distance mode, old streaming advanced, 38831 github.tar, small window log, old streaming advanced, 199561 github.tar, small hash log, old streaming advanced, 129870 github.tar, small chain log, old streaming advanced, 41669 github.tar, explicit params, old streaming advanced, 41385 -github.tar, uncompressed literals, old streaming advanced, 38441 +github.tar, uncompressed literals, old streaming advanced, 38831 github.tar, uncompressed literals optimal, old streaming advanced, 32134 github.tar, huffman literals, old streaming advanced, 42536 -github.tar, multithreaded with advanced params, old streaming advanced, 38441 +github.tar, multithreaded with advanced params, old streaming advanced, 38831 github, level -5 with dict, old streaming cdict, 46718 github, level -3 with dict, old streaming cdict, 45395 github, level -1 with dict, old streaming cdict, 43170 From 168d0a3c89dd7d74fc682859520e895b6f1b521a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 13 Sep 2021 16:35:58 -0400 Subject: [PATCH 15/17] Fix Flaky Test This test depended on `_extDict` and `_noDict` compressing identically, which is not a guarantee we make, AFAIK. --- tests/zstreamtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index bbef903f89..093850e10a 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -906,7 +906,7 @@ static int basicUnitTests(U32 seed, double compressibility) in.pos = 0; in.size = CNBufferSize - in.size; CHECK(!(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) == 0), "Not finished"); - CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize)); + CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, out.pos)); DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableOutBuffer modify buffer : ", testNb++); From 79ca83076620d69a419e167b2a49ca01557285f8 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 5 Oct 2021 14:53:45 -0400 Subject: [PATCH 16/17] Style: Add Comments to Variables and Move a Couple into the Loop --- lib/compress/zstd_double_fast.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index c58e5cd3f1..db95fe634a 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -73,24 +73,24 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( U32 offset; U32 curr; + /* how many positions to search before increasing step size */ const size_t kStepIncr = 1 << kSearchStrength; + /* the position at which to increment the step size if no match is found */ const BYTE* nextStep; - size_t step; + size_t step; /* the current step size */ - size_t hl0; - size_t hs0; - size_t hl1; + size_t hl0; /* the long hash at ip */ + size_t hl1; /* the long hash at ip1 */ - U32 idxl0; - U32 idxs0; - U32 idxl1; + U32 idxl0; /* the long match index for ip */ + U32 idxl1; /* the long match index for ip1 */ - const BYTE* matchl0; - const BYTE* matchs0; - const BYTE* matchl1; + const BYTE* matchl0; /* the long match for ip */ + const BYTE* matchs0; /* the short match for ip */ + const BYTE* matchl1; /* the long match for ip1 */ - const BYTE* ip = istart; - const BYTE* ip1; + const BYTE* ip = istart; /* the current position */ + const BYTE* ip1; /* the next position */ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); @@ -120,9 +120,9 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( /* Main Search Loop */ do { + const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 idxs0 = hashSmall[hs0]; curr = (U32)(ip-base); - hs0 = ZSTD_hashPtr(ip, hBitsS, mls); - idxs0 = hashSmall[hs0]; matchs0 = base + idxs0; hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ From 0bfc935add6f47630c28b4e9215026e5860cb85e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 11 Oct 2021 15:57:29 -0400 Subject: [PATCH 17/17] Convert Outer Control Structure to Loop --- lib/compress/zstd_double_fast.c | 242 ++++++++++++++++---------------- 1 file changed, 121 insertions(+), 121 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index db95fe634a..9d19468cba 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -104,152 +104,152 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } -_start: - - step = 1; - nextStep = ip + kStepIncr; - ip1 = ip + step; - - if (ip1 > ilimit) { - goto _cleanup; - } - - hl0 = ZSTD_hashPtr(ip, hBitsL, 8); - idxl0 = hashLong[hl0]; - matchl0 = base + idxl0; - - /* Main Search Loop */ - do { - const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 idxs0 = hashSmall[hs0]; - curr = (U32)(ip-base); - matchs0 = base + idxs0; - - hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ - - /* check noDict repcode */ - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; + /* Outer Loop: one iteration per match found and stored */ + while (1) { + step = 1; + nextStep = ip + kStepIncr; + ip1 = ip + step; + + if (ip1 > ilimit) { + goto _cleanup; } - hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + idxl0 = hashLong[hl0]; + matchl0 = base + idxl0; + + /* Inner Loop: one iteration per search / position */ + do { + const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 idxs0 = hashSmall[hs0]; + curr = (U32)(ip-base); + matchs0 = base + idxs0; + + hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ + + /* check noDict repcode */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); - if (idxl0 > prefixLowestIndex) { - /* check prefix long match */ - if (MEM_read64(matchl0) == MEM_read64(ip)) { - mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; - offset = (U32)(ip-matchl0); - while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ - goto _match_found; + if (idxl0 > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchl0) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; + offset = (U32)(ip-matchl0); + while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ + goto _match_found; + } } - } - idxl1 = hashLong[hl1]; - matchl1 = base + idxl1; + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; - if (idxs0 > prefixLowestIndex) { - /* check prefix short match */ - if (MEM_read32(matchs0) == MEM_read32(ip)) { - goto _search_next_long; + if (idxs0 > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(matchs0) == MEM_read32(ip)) { + goto _search_next_long; + } } - } - if (ip1 >= nextStep) { - PREFETCH_L1(ip1 + 64); - PREFETCH_L1(ip1 + 128); - step++; - nextStep += kStepIncr; - } - ip = ip1; - ip1 += step; + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + step++; + nextStep += kStepIncr; + } + ip = ip1; + ip1 += step; - hl0 = hl1; - idxl0 = idxl1; - matchl0 = matchl1; -#if defined(__aarch64__) - PREFETCH_L1(ip+256); -#endif - } while (ip1 <= ilimit); + hl0 = hl1; + idxl0 = idxl1; + matchl0 = matchl1; + #if defined(__aarch64__) + PREFETCH_L1(ip+256); + #endif + } while (ip1 <= ilimit); _cleanup: - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; - /* Return the last literals size */ - return (size_t)(iend - anchor); + /* Return the last literals size */ + return (size_t)(iend - anchor); _search_next_long: - /* check prefix long +1 match */ - if (idxl1 > prefixLowestIndex) { - if (MEM_read64(matchl1) == MEM_read64(ip1)) { - ip = ip1; - mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; - offset = (U32)(ip-matchl1); - while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ - goto _match_found; + /* check prefix long +1 match */ + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip1)) { + ip = ip1; + mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ + goto _match_found; + } } - } - /* if no long +1 match, explore the short match we found */ - mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; - offset = (U32)(ip - matchs0); - while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ + /* if no long +1 match, explore the short match we found */ + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ - /* fall-through */ + /* fall-through */ _match_found: /* requires ip, offset, mLength */ - offset_2 = offset_1; - offset_1 = offset; - - if (step < 4) { - /* It is unsafe to write this value back to the hashtable when ip1 is - * greater than or equal to the new ip we will have after we're done - * processing this match. Rather than perform that test directly - * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler - * more predictable test. The minmatch even if we take a short match is - * 4 bytes, so as long as step, the distance between ip and ip1 - * (initially) is less than 4, we know ip1 < new ip. */ - hashLong[hl1] = (U32)(ip1 - base); - } + offset_2 = offset_1; + offset_1 = offset; + + if (step < 4) { + /* It is unsafe to write this value back to the hashtable when ip1 is + * greater than or equal to the new ip we will have after we're done + * processing this match. Rather than perform that test directly + * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler + * more predictable test. The minmatch even if we take a short match is + * 4 bytes, so as long as step, the distance between ip and ip1 + * (initially) is less than 4, we know ip1 < new ip. */ + hashLong[hl1] = (U32)(ip1 - base); + } - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = curr+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } + /* match found */ + ip += mLength; + anchor = ip; - /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } - goto _start; + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + } + } }