From 57a7b299fbb0e6702e649db00da897a5e823bdfe Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 15 Nov 2021 20:56:42 -0800 Subject: [PATCH] lib: zstd: Remove large inline functions from zstd_{double_,}fast.c Backport of upstream PR #2863 [0]. Large functions with excessive force inlining can cause trouble for compilers, and can sometimes take excess stack space because the compiler isn't able to fully analyze the function. This commit splits functions that have multiple copies of the same body into multiple smaller functions, which can help the compiler. This was specifically causing issues on the parisc architecture [1]. In this configuration, especially with UBSAN enabled, these functions stack usage could get quite large. This is because the compiler was doing a poor job handling the extremely large function which had multiple copies of the function body inlined into it. After this commit we see: [0] https://github.com/facebook/zstd/pull/2863 [1] https://lkml.org/lkml/2021/11/15/710 Reported-by: Geert Uytterhoeven Signed-off-by: Nick Terrell --- lib/zstd/compress/zstd_double_fast.c | 61 ++++++++++++++++++------ lib/zstd/compress/zstd_fast.c | 69 ++++++++++++++++++++++------ 2 files changed, 104 insertions(+), 26 deletions(-) diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c index b0424d23ac57f0..fb941a5b70f54e 100644 --- a/lib/zstd/compress/zstd_double_fast.c +++ b/lib/zstd/compress/zstd_double_fast.c @@ -313,6 +313,26 @@ size_t ZSTD_compressBlock_doubleFast_generic( return (size_t)(iend - anchor); } +#define ZSTD_GEN_FN(dictMode, mls) \ + static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_##dictMode); \ + } + +ZSTD_GEN_FN(noDict, 4) +ZSTD_GEN_FN(noDict, 5) +ZSTD_GEN_FN(noDict, 6) +ZSTD_GEN_FN(noDict, 7) + +ZSTD_GEN_FN(dictMatchState, 4) +ZSTD_GEN_FN(dictMatchState, 5) +ZSTD_GEN_FN(dictMatchState, 6) +ZSTD_GEN_FN(dictMatchState, 7) + +#undef ZSTD_GEN_FN + size_t ZSTD_compressBlock_doubleFast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -323,13 +343,13 @@ size_t ZSTD_compressBlock_doubleFast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); } } @@ -343,13 +363,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); } } @@ -385,7 +405,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -499,6 +519,21 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } +#define ZSTD_GEN_FN(mls) \ + static size_t ZSTD_compressBlock_doubleFast_extDict_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_FN(4) +ZSTD_GEN_FN(5) +ZSTD_GEN_FN(6) +ZSTD_GEN_FN(7) + +#undef ZSTD_GEN_FN + size_t ZSTD_compressBlock_doubleFast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -508,12 +543,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); } } diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c index 96b7d48e2868ef..e0652e31d421a5 100644 --- a/lib/zstd/compress/zstd_fast.c +++ b/lib/zstd/compress/zstd_fast.c @@ -182,6 +182,20 @@ ZSTD_compressBlock_fast_generic( return (size_t)(iend - anchor); } +#define ZSTD_GEN_FN(mls) \ + static size_t ZSTD_compressBlock_fast_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_FN(4) +ZSTD_GEN_FN(5) +ZSTD_GEN_FN(6) +ZSTD_GEN_FN(7) + +#undef ZSTD_GEN_FN size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -193,13 +207,13 @@ size_t ZSTD_compressBlock_fast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_7(ms, seqStore, rep, src, srcSize); } } @@ -351,6 +365,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( return (size_t)(iend - anchor); } +#define ZSTD_GEN_FN(mls) \ + static size_t ZSTD_compressBlock_fast_dictMatchState_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_FN(4) +ZSTD_GEN_FN(5) +ZSTD_GEN_FN(6) +ZSTD_GEN_FN(7) + +#undef ZSTD_GEN_FN + size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -361,13 +390,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_dictMatchState_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_dictMatchState_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_dictMatchState_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_dictMatchState_7(ms, seqStore, rep, src, srcSize); } } @@ -402,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -475,6 +504,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( return (size_t)(iend - anchor); } +#define ZSTD_GEN_FN(mls) \ + static size_t ZSTD_compressBlock_fast_extDict_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_FN(4) +ZSTD_GEN_FN(5) +ZSTD_GEN_FN(6) +ZSTD_GEN_FN(7) + +#undef ZSTD_GEN_FN size_t ZSTD_compressBlock_fast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -485,12 +528,12 @@ size_t ZSTD_compressBlock_fast_extDict( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_extDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_extDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_extDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_extDict_7(ms, seqStore, rep, src, srcSize); } }