diff --git a/lib/common/compiler.h b/lib/common/compiler.h index a9062d0f35..ea5fe2f472 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -101,6 +101,13 @@ # define TARGET_ATTRIBUTE(target) #endif +/* Target attribute for BMI2 dynamic dispatch. + * Enable lzcnt, bmi, and bmi2. + * We test for bmi1 & bmi2. lzcnt is included in bmi1. + */ +#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2") + + /* Enable runtime BMI2 dispatch based on the CPU. * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. */ diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 3ac00255f3..4229b40c5e 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -223,7 +223,7 @@ static size_t FSE_readNCount_body_default( } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( +BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2( short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) { @@ -343,7 +343,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r } #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, +static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index f4ff58fa0a..a5a358015f 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) { return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); } diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 1dfa120e54..9073df4223 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -20,6 +20,7 @@ * Dependencies ***************************************/ #include "compiler.h" +#include "cpu.h" #include "mem.h" #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ #include "error_private.h" @@ -472,6 +473,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize); +/** + * @returns true iff the CPU supports dynamic BMI2 dispatch. + */ +MEM_STATIC int ZSTD_cpuSupportsBmi2(void) +{ + ZSTD_cpuid_t cpuid = ZSTD_cpuid(); + return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); +} #if defined (__cplusplus) } diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index facec33005..41a414b5ca 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1029,7 +1029,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 32e486cd8c..579c105b24 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -12,7 +12,6 @@ * Dependencies ***************************************/ #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ -#include "../common/cpu.h" #include "../common/mem.h" #include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ @@ -100,7 +99,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) assert(cctx != NULL); ZSTD_memset(cctx, 0, sizeof(*cctx)); cctx->customMem = memManager; - cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + cctx->bmi2 = ZSTD_cpuSupportsBmi2(); { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); assert(!ZSTD_isError(err)); (void)err; diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c index 44e1728ff5..fa31e6e946 100644 --- a/lib/compress/zstd_compress_sequences.c +++ b/lib/compress/zstd_compress_sequences.c @@ -399,7 +399,7 @@ ZSTD_encodeSequences_default( #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_encodeSequences_bmi2( void* dst, size_t dstCapacity, FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 9322c99a64..dd6c466c71 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -68,7 +68,7 @@ #endif #if HUF_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 -# define HUF_ASM_X86_64_BMI2_ATTRS TARGET_ATTRIBUTE("bmi2") +# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE #else # define HUF_ASM_X86_64_BMI2_ATTRS #endif @@ -120,7 +120,7 @@ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ } \ \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \ void* dst, size_t dstSize, \ const void* cSrc, size_t cSrcSize, \ const HUF_DTable* DTable) \ @@ -670,7 +670,7 @@ HUF_decompress4X1_usingDTable_internal_body( } #if HUF_NEED_BMI2_FUNCTION -static TARGET_ATTRIBUTE("bmi2") +static BMI2_TARGET_ATTRIBUTE size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) { return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); @@ -1386,7 +1386,7 @@ HUF_decompress4X2_usingDTable_internal_body( } #if HUF_NEED_BMI2_FUNCTION -static TARGET_ATTRIBUTE("bmi2") +static BMI2_TARGET_ATTRIBUTE size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) { return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 47ca874bf6..0031e98cfb 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -56,7 +56,6 @@ * Dependencies *********************************************************/ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ -#include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY #include "../common/fse.h" @@ -265,7 +264,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; #if DYNAMIC_BMI2 - dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->bmi2 = ZSTD_cpuSupportsBmi2(); #endif dctx->ddictSet = NULL; ZSTD_DCtx_resetParameters(dctx); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 79a44d9cba..e548844ce0 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -571,7 +571,7 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, +BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) @@ -1846,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, #if DYNAMIC_BMI2 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1856,7 +1856,7 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1869,7 +1869,7 @@ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq,