Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bmi2] Add lzcnt and bmi target attributes #2888

Merged
merged 1 commit into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lib/common/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@
# define TARGET_ATTRIBUTE(target)
#endif

/* Target attribute for BMI2 dynamic dispatch.
* Enable lzcnt, bmi, and bmi2.
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
*/
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")


/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
Expand Down
4 changes: 2 additions & 2 deletions lib/common/entropy_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ static size_t FSE_readNCount_body_default(
}

#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
Expand Down Expand Up @@ -343,7 +343,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
}

#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
Expand Down
2 changes: 1 addition & 1 deletion lib/common/fse_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
}

#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
}
Expand Down
9 changes: 9 additions & 0 deletions lib/common/zstd_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* Dependencies
***************************************/
#include "compiler.h"
#include "cpu.h"
#include "mem.h"
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
Expand Down Expand Up @@ -472,6 +473,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize);

/**
* @returns true iff the CPU supports dynamic BMI2 dispatch.
*/
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
{
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
}

#if defined (__cplusplus)
}
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/huf_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,

#if DYNAMIC_BMI2

static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
Expand Down
3 changes: 1 addition & 2 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
* Dependencies
***************************************/
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
#include "../common/cpu.h"
#include "../common/mem.h"
#include "hist.h" /* HIST_countFast_wksp */
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
Expand Down Expand Up @@ -100,7 +99,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
assert(cctx != NULL);
ZSTD_memset(cctx, 0, sizeof(*cctx));
cctx->customMem = memManager;
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
cctx->bmi2 = ZSTD_cpuSupportsBmi2();
{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
assert(!ZSTD_isError(err));
(void)err;
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/zstd_compress_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ ZSTD_encodeSequences_default(

#if DYNAMIC_BMI2

static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
Expand Down
8 changes: 4 additions & 4 deletions lib/decompress/huf_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
#endif

#if HUF_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
# define HUF_ASM_X86_64_BMI2_ATTRS TARGET_ATTRIBUTE("bmi2")
# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
#else
# define HUF_ASM_X86_64_BMI2_ATTRS
#endif
Expand Down Expand Up @@ -120,7 +120,7 @@
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
} \
\
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
void* dst, size_t dstSize, \
const void* cSrc, size_t cSrcSize, \
const HUF_DTable* DTable) \
Expand Down Expand Up @@ -670,7 +670,7 @@ HUF_decompress4X1_usingDTable_internal_body(
}

#if HUF_NEED_BMI2_FUNCTION
static TARGET_ATTRIBUTE("bmi2")
static BMI2_TARGET_ATTRIBUTE
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
size_t cSrcSize, HUF_DTable const* DTable) {
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
Expand Down Expand Up @@ -1386,7 +1386,7 @@ HUF_decompress4X2_usingDTable_internal_body(
}

#if HUF_NEED_BMI2_FUNCTION
static TARGET_ATTRIBUTE("bmi2")
static BMI2_TARGET_ATTRIBUTE
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
size_t cSrcSize, HUF_DTable const* DTable) {
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
Expand Down
3 changes: 1 addition & 2 deletions lib/decompress/zstd_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@
* Dependencies
*********************************************************/
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
Expand Down Expand Up @@ -265,7 +264,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
#if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif
dctx->ddictSet = NULL;
ZSTD_DCtx_resetParameters(dctx);
Expand Down
8 changes: 4 additions & 4 deletions lib/decompress/zstd_decompress_block.c
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
}

#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize)
Expand Down Expand Up @@ -1846,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
#if DYNAMIC_BMI2

#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
Expand All @@ -1856,7 +1856,7 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
{
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
DONT_VECTORIZE
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
Expand All @@ -1869,7 +1869,7 @@ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */

#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
Expand Down