Skip to content

Commit

Permalink
Improves performance for small data streaming by removing the costly …
Browse files Browse the repository at this point in the history
…tag space initialization.

Add salting to hash to reduce collision when re-using hash table across multiple compressions.
Salting the hash makes it so hashes from previous compressions won't match to hashes of similar data in current compression
  • Loading branch information
yoniko committed Jan 13, 2023
1 parent fba704f commit 6055bef
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 30 deletions.
12 changes: 11 additions & 1 deletion lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1809,7 +1809,17 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
{ /* Row match finder needs an additional table of hashes ("tags") */
size_t const tagTableSize = hSize*sizeof(U16);
ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
ms->hashSalt = (ms->hashSalt + 12345) * 1103515245; // ISO C PRNG
#if ZSTD_MEMORY_SANITIZER
/* The rowHash algorithm has the ability to work with uninitialized tag space.
* It doesn't matter if the tag space is initially correct or not,
* at worst it will incorrectly "hint" at a match position
* which is then properly filtered out by indices analysis.
* There is also no requirement to start the series of tag from position "0".
* */
__msan_unpoison(ms->tagTable, tagTableSize);
#endif

}
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
Expand Down
49 changes: 37 additions & 12 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ struct ZSTD_matchState_t {
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U32 hashSalt;

U32* hashTable;
U32* hashTable3;
Expand Down Expand Up @@ -754,28 +755,34 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes
***************************************/
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }

static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h); }
static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }

static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
static size_t ZSTD_hash5(U64 u, U32 h, U32 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U32 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }

static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
static size_t ZSTD_hash6(U64 u, U32 h, U32 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U32 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }

static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
static size_t ZSTD_hash7(U64 u, U32 h, U32 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U32 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }

static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
static size_t ZSTD_hash8(U64 u, U32 h, U32 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U32 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }

MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
Expand All @@ -795,6 +802,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}

MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U32 hashSalt) {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);

switch(mls)
{
default:
case 4: return ZSTD_hash4PtrS(p, hBits, hashSalt);
case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
}
}


/** ZSTD_ipow() :
* Return base^exponent.
*/
Expand Down
38 changes: 21 additions & 17 deletions lib/compress/zstd_lazy.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* ta
*/
FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
U32 const rowLog, U32 const mls,
U32 idx, const BYTE* const iLimit)
U32 idx, const BYTE* const iLimit, U32 const hashSalt)
{
U32 const* const hashTable = ms->hashTable;
U16 const* const tagTable = ms->tagTable;
Expand All @@ -851,7 +851,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);

for (; idx < lim; ++idx) {
U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
Expand All @@ -869,9 +869,10 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
U16 const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
U32 const rowLog, U32 const mls)
U32 const rowLog, U32 const mls,
U32 const hashSalt)
{
U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
Expand All @@ -886,7 +887,8 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
U32 updateStartIdx, U32 const updateEndIdx,
U32 const mls, U32 const rowLog,
U32 const rowMask, U32 const useCache)
U32 const rowMask, U32 const useCache,
U32 const hashSalt)
{
U32* const hashTable = ms->hashTable;
U16* const tagTable = ms->tagTable;
Expand All @@ -895,15 +897,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,

DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
: (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, hashSalt)
: (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32* const row = hashTable + relRow;
BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
Explicit cast allows us to get exact desired position within each row */
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);

assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt));
((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
row[pos] = updateStartIdx;
}
Expand All @@ -915,7 +917,8 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
*/
FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
U32 const mls, U32 const rowLog,
U32 const rowMask, U32 const useCache)
U32 const rowMask, U32 const useCache,
U32 const hashSalt)
{
U32 idx = ms->nextToUpdate;
const BYTE* const base = ms->window.base;
Expand All @@ -932,13 +935,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const
*/
if (UNLIKELY(target - idx > kSkipThreshold)) {
U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache);
ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache, hashSalt);
idx = target - kMaxMatchEndPositionsToUpdate;
ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1);
ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1, hashSalt);
}
}
assert(target >= idx);
ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache);
ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache, hashSalt);
ms->nextToUpdate = target;
}

Expand All @@ -952,7 +955,7 @@ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);

DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */, ms->hashSalt);
}

/* Returns the mask width of bits group of which will be set to 1. Given not all
Expand Down Expand Up @@ -1164,6 +1167,7 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowMask = rowEntries - 1;
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
const U32 hashSalt = ms->hashSalt;
U32 nbAttempts = 1U << cappedSearchLog;
size_t ml=4-1;

Expand Down Expand Up @@ -1199,9 +1203,9 @@ size_t ZSTD_RowFindBestMatch(
}

/* Update the hashTable and tagTable up to (but not including) ip */
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */, hashSalt);
{ /* Get the hash for ip, compute the appropriate row */
U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
U32* const row = hashTable + relRow;
Expand Down Expand Up @@ -1548,7 +1552,7 @@ ZSTD_compressBlock_lazy_generic(
if (searchMethod == search_rowHash) {
ZSTD_row_fillHashCache(ms, base, rowLog,
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
ms->nextToUpdate, ilimit);
ms->nextToUpdate, ilimit, ms->hashSalt);
}

/* Match Loop */
Expand Down Expand Up @@ -1918,7 +1922,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
if (searchMethod == search_rowHash) {
ZSTD_row_fillHashCache(ms, base, rowLog,
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
ms->nextToUpdate, ilimit);
ms->nextToUpdate, ilimit, ms->hashSalt);
}

/* Match Loop */
Expand Down

0 comments on commit 6055bef

Please sign in to comment.