From 012e03f0dfb1a06e8f7defdaa9d0e3a5ab5aa261 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 12 Jul 2023 00:55:17 +0200 Subject: [PATCH 01/10] Revert to the previous version and address Jakob's feedback --- src/coreclr/jit/compiler.cpp | 9 + src/coreclr/jit/compiler.h | 57 ++- src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/helperexpansion.cpp | 349 +++++++++++++++++- src/coreclr/jit/importercalls.cpp | 12 + src/coreclr/jit/morph.cpp | 10 + src/coreclr/jit/namedintrinsiclist.h | 3 + src/coreclr/jit/valuenum.cpp | 8 +- .../src/System/Text/UTF8Encoding.Sealed.cs | 35 +- .../JIT/opt/Vectorization/GetUtf8Bytes.cs | 244 ++++++++++++ .../JIT/opt/Vectorization/GetUtf8Bytes.csproj | 11 + 11 files changed, 724 insertions(+), 15 deletions(-) create mode 100644 src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs create mode 100644 src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9129d5d4c3b27..15623f46b226d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4877,6 +4877,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl bool doBranchOpt = true; bool doCse = true; bool doAssertionProp = true; + bool doVNBasedIntrinExpansion = true; bool doRangeAnalysis = true; bool doVNBasedDeadStoreRemoval = true; int iterations = 1; @@ -4890,6 +4891,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl doBranchOpt = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0); doCse = doValueNum; doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0); + doVNBasedIntrinExpansion = doValueNum; doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0); doVNBasedDeadStoreRemoval = doValueNum && (JitConfig.JitDoVNBasedDeadStoreRemoval() != 0); @@ -4973,6 +4975,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_ASSERTION_PROP_MAIN, &Compiler::optAssertionPropMain); } + if (doVNBasedIntrinExpansion) + { + // Expand some intrinsics based on VN data + // + DoPhase(this, PHASE_VN_BASED_INTRINSIC_EXPAND, &Compiler::fgVNBasedIntrinsicExpansion); + } + if (doRangeAnalysis) { // Bounds check elimination via range analysis diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8a2d862d1fe2c..1575bb2bf753e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5203,6 +5203,8 @@ class Compiler } } + bool GetObjectHandleAndOffset(GenTree* tree, ssize_t* byteOffset, CORINFO_OBJECT_HANDLE* pObj); + // Convert a BYTE which represents the VM's CorInfoGCtype to the JIT's var_types var_types getJitGCType(BYTE gcType); @@ -5332,10 +5334,10 @@ class Compiler void SplitTreesRemoveCommas(); template - PhaseStatus fgExpandHelper(bool skipRarelyRunBlocks); + PhaseStatus fgExpandHelper(bool skipRarelyRunBlocks, bool handleIntrinsics = false); template - bool fgExpandHelperForBlock(BasicBlock** pBlock); + bool fgExpandHelperForBlock(BasicBlock** pBlock, bool handleIntrinsics = false); PhaseStatus fgExpandRuntimeLookups(); bool fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); @@ -5346,6 +5348,10 @@ class Compiler PhaseStatus fgExpandStaticInit(); bool fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + PhaseStatus fgVNBasedIntrinsicExpansion(); + bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + bool fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + PhaseStatus fgInsertGCPolls(); BasicBlock* fgCreateGCPoll(GCPollType pollType, BasicBlock* block); @@ -7074,6 +7080,7 @@ class Compiler #define OMF_HAS_MDARRAYREF 0x00004000 // Method contains multi-dimensional intrinsic array element loads or stores. #define OMF_HAS_STATIC_INIT 0x00008000 // Method has static initializations we might want to partially inline #define OMF_HAS_TLS_FIELD 0x00010000 // Method contains TLS field access +#define OMF_HAS_SPECIAL_INTRINSICS 0x00020000 // Method contains special intrinsics expanded in late phases // clang-format on @@ -7134,6 +7141,16 @@ class Compiler optMethodFlags |= OMF_HAS_TLS_FIELD; } + bool doesMethodHaveSpecialIntrinsics() + { + return (optMethodFlags & OMF_HAS_SPECIAL_INTRINSICS) != 0; + } + + void setMethodHasSpecialIntrinsics() + { + optMethodFlags |= OMF_HAS_SPECIAL_INTRINSICS; + } + void pickGDV(GenTreeCall* call, IL_OFFSET ilOffset, bool isInterface, @@ -8947,6 +8964,42 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return size; // 2, 1, 0 } + var_types roundDownMaxRegSize(unsigned size) + { + assert(size > 0); + ssize_t maxRegSize = TARGET_POINTER_SIZE; +#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) + if (IsBaselineSimdIsaSupported()) + { + maxRegSize = max(maxRegSize, (ssize_t)roundDownSIMDSize((unsigned)size)); + } +#endif + int nearestPow2 = 1 << BitOperations::Log2((unsigned)size); + switch (min((int)maxRegSize, nearestPow2)) + { + case 1: + return TYP_UBYTE; + case 2: + return TYP_USHORT; + case 4: + return TYP_INT; + case 8: + return TYP_LONG; +#ifdef FEATURE_SIMD + case 16: + return TYP_SIMD16; +#ifdef TARGET_XARCH + case 32: + return TYP_SIMD32; + case 64: + return TYP_SIMD64; +#endif +#endif + default: + unreached(); + } + } + enum UnrollKind { Memset, diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 460b47f9bca85..0aa63000e6ae0 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -85,6 +85,7 @@ CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", false, -1, false) CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", false, -1, false) +CompPhaseNameMacro(PHASE_VN_BASED_INTRINSIC_EXPAND, "VN based intrinsic expansion", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_BRANCHES, "Redundant branch opts", false, -1, false) CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", false, -1, false) CompPhaseNameMacro(PHASE_IF_CONVERSION, "If conversion", false, -1, false) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index b95f75f075a78..7e6b3a8b90890 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -811,7 +811,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // true if there was any helper that was expanded. // template -PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) +PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks, bool handleIntrinsics) { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) @@ -824,7 +824,7 @@ PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) // Expand and visit the last block again to find more candidates INDEBUG(BasicBlock* origBlock = block); - while (fgExpandHelperForBlock(&block)) + while (fgExpandHelperForBlock(&block, handleIntrinsics)) { result = PhaseStatus::MODIFIED_EVERYTHING; #ifdef DEBUG @@ -856,7 +856,7 @@ PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) // true if a helper was expanded // template -bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock) +bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock, bool handleIntrinsics) { for (Statement* const stmt : (*pBlock)->NonPhiStatements()) { @@ -868,7 +868,14 @@ bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock) for (GenTree* const tree : stmt->TreeList()) { - if (!tree->IsHelperCall()) + if (handleIntrinsics) + { + if (!tree->IsCall() || ((tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)) + { + continue; + } + } + else if (!tree->IsHelperCall()) { continue; } @@ -1171,3 +1178,337 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G call->gtInitClsHnd = NO_CLASS_HANDLE; return true; } + +//------------------------------------------------------------------------------ +// fgVNBasedIntrinsicExpansion: Expand specific calls marked as intrinsics using VN. +// +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +PhaseStatus Compiler::fgVNBasedIntrinsicExpansion() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + + if (!doesMethodHaveSpecialIntrinsics() || opts.OptimizationDisabled()) + { + return result; + } + + // TODO: Replace with opts.compCodeOpt once it's fixed + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); + if (preferSize) + { + // The optimization comes with a codegen size increase + JITDUMP("Optimized for size - bail out.\n") + return result; + } + return fgExpandHelper<&Compiler::fgVNBasedIntrinsicExpansionForCall>(true, true); +} + +//------------------------------------------------------------------------------ +// fgVNBasedIntrinsicExpansionForCall : Expand specific calls marked as intrinsics using VN. +// +// Arguments: +// block - Block containing the intrinsic call to expand +// stmt - Statement containing the call +// call - The intrinsic call +// +// Returns: +// True if expanded, false otherwise. +// +bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +{ + assert(call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC); + NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); + if (ni == NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) + { + return fgVNBasedIntrinsicExpansionForCall_ReadUtf8(pBlock, stmt, call); + } + + // TODO: Expand IsKnownConstant here + // Also, move various unrollings here + + return false; +} + +//------------------------------------------------------------------------------ +// fgVNBasedIntrinsicExpansionForCall_ReadUtf8 : Expand NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8 +// when src data is a string literal (UTF16) tha can be narrowed to ASCII (UTF8), e.g.: +// +// string str = "Hello, world!"; +// int bytesWritten = ReadUtf8(ref str[0], str.Length, buffer, buffer.Length); +// +// becomes: +// +// bytesWritten = 0; // default value +// if (buffer.Length >= str.Length) // *might* be folded if buffer.Length is a constant +// { +// memcpy(buffer, "Hello, world!"u8, str.Length); // note the u8 suffix +// bytesWritten = str.Length; +// } +// +// Arguments: +// block - Block containing the intrinsic call to expand +// stmt - Statement containing the call +// call - The intrinsic call +// +// Returns: +// True if expanded, false otherwise. +// +bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +{ + BasicBlock* block = *pBlock; + + // NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8 + assert(call->gtArgs.CountUserArgs() == 4); + + GenTree* srcPtr = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + + // We're interested in a case when srcPtr is a string literal and srcLen is a constant + // srcLen doesn't have to match the srcPtr's length, but it should not exceed it. + ssize_t strObjOffset = 0; + CORINFO_OBJECT_HANDLE strObj = nullptr; + if (!GetObjectHandleAndOffset(srcPtr, &strObjOffset, &strObj) || (strObjOffset > INT_MAX)) + { + // We might want to support more cases here, e.g. ROS RVA data. + // Also, we check that strObjOffset (which is in most cases is expected to be just + // OFFSETOF__CORINFO_String__chars) doesn't exceed INT_MAX since we'll need to cast + // it to int for getObjectContent API. + JITDUMP("ReadUtf8: srcPtr is not an object handle\n") + return false; + } + + // We mostly expect string literal objects here, but let's be more agile just in case + if (!info.compCompHnd->isObjectImmutable(strObj)) + { + JITDUMP("ReadUtf8: srcPtr is not immutable (not a frozen string object?)\n") + return false; + } + + GenTree* srcLen = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + if (!srcLen->gtVNPair.BothEqual() || !vnStore->IsVNInt32Constant(srcLen->gtVNPair.GetLiberal())) + { + JITDUMP("ReadUtf8: srcLen is not constant\n") + return false; + } + + const int MaxPossibleUnrollThreshold = 256; + const unsigned unrollThreshold = min(getUnrollThreshold(UnrollKind::Memcpy), MaxPossibleUnrollThreshold); + const unsigned srcLenCns = (unsigned)vnStore->GetConstantInt32(srcLen->gtVNPair.GetLiberal()); + if (srcLenCns == 0 || srcLenCns > unrollThreshold) + { + // TODO: handle srcLenCns == 0 if it's a common case + JITDUMP("ReadUtf8: srcLenCns is out of unrollable range\n") + return false; + } + + // Read the string literal (UTF16) into a local buffer (UTF8) + assert(strObj != nullptr); + BYTE srcUtf8cns[MaxPossibleUnrollThreshold * 2] = {}; + + // Both must be within [0..INT_MAX] range as we're going to cast them to int + assert((unsigned)srcLenCns <= INT_MAX); + assert((unsigned)strObjOffset <= INT_MAX); + + // getObjectContent is expected to validate the offset and length + if (!info.compCompHnd->getObjectContent(strObj, srcUtf8cns, (int)srcLenCns, (int)strObjOffset)) + { + JITDUMP("ReadUtf8: getObjectContent returned false.\n") + return false; + } + + for (unsigned charIndex = 0; charIndex < srcLenCns; charIndex++) + { + uint16_t ch = ((uint16_t*)srcUtf8cns)[charIndex]; + if (ch > 127) + { + // Only ASCII is supported. + JITDUMP("ReadUtf8: %dth char is not ASCII.\n", charIndex) + return false; + } + + // Narrow U16 to U8 in the same buffer (it's ok since we only deal with ASCII) + srcUtf8cns[charIndex] = (BYTE)ch; + } + + DebugInfo debugInfo = stmt->GetDebugInfo(); + + // Split block right before the call tree (this is a standard pattern we use in helperexpansion.cpp) + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + *pBlock = block; + + // If we suddenly need to use these arguments, we'll have to reload them from the call + // after the split, so let's null them to prevent accidental use. + srcLen = nullptr; + srcPtr = nullptr; + + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + // We don't need this flag anymore. + call->gtCallMoreFlags &= ~GTF_CALL_M_SPECIAL_INTRINSIC; + + // Grab a temp to store the result. + // The result corresponds the number of bytes written to dstPtr (int32). + assert(call->TypeIs(TYP_INT)); + const unsigned resultLclNum = lvaGrabTemp(true DEBUGARG("local for result")); + lvaTable[resultLclNum].lvType = TYP_INT; + *callUse = gtNewLclvNode(resultLclNum, TYP_INT); + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + + // srcLenCns is the length of the string literal in chars (UTF16) + // but we're going to use the same value as the "bytesWritten" result in the fast path and in the length check. + GenTree* srcLenCnsNode = gtNewIconNode(srcLenCns); + fgUpdateConstTreeValueNumber(srcLenCnsNode); + + // We're going to insert the following blocks: + // + // prevBb: + // + // lengthCheckBb: + // bytesWritten = -1; + // if (dstLen + // bytesWritten = srcLenCns * 2; + // + // block: + // use(bytesWritten) + // + + // + // Block 1: lengthCheckBb (we check that dstLen < srcLen) + // In case if destIsKnownToFit is true we'll use this block to keep side-effects of the original arguments. + // and it will be a fall-through block. + // + BasicBlock* lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true); + lengthCheckBb->bbFlags |= BBF_INTERNAL; + + GenTree* dstPtr = call->gtArgs.GetUserArgByIndex(2)->GetNode(); + GenTree* dstLen = call->gtArgs.GetUserArgByIndex(3)->GetNode(); + + // Set bytesWritten -1 by default, if the fast path is not taken we'll return it as the result. + GenTree* bytesWrittenDefaultVal = gtNewStoreLclVarNode(resultLclNum, gtNewIconNode(-1)); + fgInsertStmtAtEnd(lengthCheckBb, fgNewStmtFromTree(bytesWrittenDefaultVal, debugInfo)); + + GenTree* lengthCheck = gtNewOperNode(GT_LT, TYP_INT, gtClone(dstLen), srcLenCnsNode); + lengthCheck->gtFlags |= GTF_RELOP_JMP_USED; + Statement* lengthCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, lengthCheck), debugInfo); + fgInsertStmtAtEnd(lengthCheckBb, lengthCheckStmt); + lengthCheckBb->bbCodeOffs = block->bbCodeOffsEnd; + lengthCheckBb->bbCodeOffsEnd = block->bbCodeOffsEnd; + + // + // Block 2: fastpathBb - unrolled loop that copies the UTF8 const data to the destination + // + // We're going to emit a series of loads and stores to copy the data. + // In theory, we could just emit the const U8 data to the data section and use GT_BLK here + // but that would be a bit less efficient since we would have to load the data from memory. + // + BasicBlock* fastpathBb = fgNewBBafter(BBJ_NONE, lengthCheckBb, true); + fastpathBb->bbFlags |= BBF_INTERNAL; + + // The widest type we can use for loads + const var_types maxLoadType = roundDownMaxRegSize(srcLenCns); + + // How many iterations we need to copy UTF8 const data to the destination + unsigned iterations = srcLenCns / genTypeSize(maxLoadType); + + // Add one more iteration if we have a remainder + iterations += srcLenCns % genTypeSize(maxLoadType) == 0 ? 0 : 1; + + for (unsigned i = 0; i < iterations; i++) + { + ssize_t offest = (ssize_t)i * genTypeSize(maxLoadType); + + // Last iteration: overlap with previous load if needed + if (i == iterations - 1) + { + offest = (ssize_t)srcLenCns - genTypeSize(maxLoadType); + } + + // We're going to emit the following tree: + + // -A-XG------ * ASG %maxLoadType% (copy) + // D--XG--N--- +--* IND %maxLoadType% + // -------N--- | \--* ADD byref + // ----------- | +--* LCL_VAR byref dstPtr + // ----------- | \--* CNS_INT int %offset% + // ----------- \--* CNS_VEC or CNS_INT representing UTF8 const data chunk + + GenTreeIntCon* offsetNode = gtNewIconNode(offest); + fgUpdateConstTreeValueNumber(offsetNode); + + // Grab a chunk from srcUtf8cnsData for the given offset and width + GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, srcUtf8cns + offest); + fgUpdateConstTreeValueNumber(utf8cnsChunkNode); + + GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(dstPtr), offsetNode); + GenTreeOp* storeInd = gtNewStoreIndNode(maxLoadType, dstAddOffsetNode, utf8cnsChunkNode); + Statement* storeIndStmt = fgNewStmtFromTree(storeInd, debugInfo); + fgInsertStmtAtEnd(fastpathBb, storeIndStmt); + gtUpdateStmtSideEffects(storeIndStmt); + } + + // Finally, store the number of bytes written to the resultLcl local + Statement* finalStmt = fgNewStmtFromTree(gtNewStoreLclVarNode(resultLclNum, gtCloneExpr(srcLenCnsNode)), debugInfo); + fgInsertStmtAtEnd(fastpathBb, finalStmt); + fastpathBb->bbCodeOffs = block->bbCodeOffsEnd; + fastpathBb->bbCodeOffsEnd = block->bbCodeOffsEnd; + + // + // Update preds in all new blocks + // + // block is no longer a predecessor of prevBb + fgRemoveRefPred(block, prevBb); + // prevBb flows into lengthCheckBb + fgAddRefPred(lengthCheckBb, prevBb); + // lengthCheckBb has two successors: block and fastpathBb (if !destIsKnownToFit) + fgAddRefPred(fastpathBb, lengthCheckBb); + fgAddRefPred(block, lengthCheckBb); + // fastpathBb flows into block + fgAddRefPred(block, fastpathBb); + // lengthCheckBb jumps to block if condition is met + lengthCheckBb->bbJumpDest = block; + + // + // Re-distribute weights + // + lengthCheckBb->inheritWeight(prevBb); + // we don't have any real world data on how often this fallback path is taken so we just assume 20% of the time + fastpathBb->inheritWeightPercentage(lengthCheckBb, 80); + block->inheritWeight(prevBb); + + // + // Update bbNatLoopNum for all new blocks + // + lengthCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fastpathBb->bbNatLoopNum = prevBb->bbNatLoopNum; + + // All blocks are expected to be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, lengthCheckBb)); + assert(BasicBlock::sameEHRegion(prevBb, fastpathBb)); + + // Extra step: merge prevBb with lengthCheckBb if possible + if (fgCanCompactBlocks(prevBb, lengthCheckBb)) + { + fgCompactBlocks(prevBb, lengthCheckBb); + } + + JITDUMP("ReadUtf8: succesfully expanded!\n") + return true; +} diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 7a304b11469ed..8918f783d8066 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3749,6 +3749,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8: case NI_System_SpanHelpers_SequenceEqual: case NI_System_Buffer_Memmove: { @@ -9166,6 +9167,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } } + else if (strcmp(namespaceName, "Text") == 0) + { + if (strcmp(className, "UTF8EncodingSealed") == 0) + { + if (strcmp(methodName, "ReadUtf8") == 0) + { + assert(strcmp(enclosingClassName, "UTF8Encoding") == 0); + result = NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8; + } + } + } else if (strcmp(namespaceName, "Threading") == 0) { if (strcmp(className, "Interlocked") == 0) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d2a286508408e..2987bf4b3ea45 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7680,6 +7680,16 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) #endif } + if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + { + if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) == + NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) + { + // Expanded in fgVNBasedIntrinsicExpansion + setMethodHasSpecialIntrinsics(); + } + } + if (((call->gtCallMoreFlags & (GTF_CALL_M_SPECIAL_INTRINSIC | GTF_CALL_M_LDVIRTFTN_INTERFACE)) == 0) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 86c7d445dabba..0973a45571f49 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -66,6 +66,9 @@ enum NamedIntrinsic : unsigned short NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness, NI_System_GC_KeepAlive, + + NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8, + NI_System_Threading_Thread_get_CurrentThread, NI_System_Threading_Thread_get_ManagedThreadId, NI_System_Threading_Volatile_Read, diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 0cb86cbd04e75..ccf8588c16b41 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10665,7 +10665,6 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s // the given tree. // // Arguments: -// vnStore - ValueNumStore object // tree - tree node to inspect // byteOffset - [Out] resulting byte offset // pObj - [Out] constant object handle @@ -10673,10 +10672,7 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s // Return Value: // true if the given tree is a ObjHandle + CNS // -static bool GetObjectHandleAndOffset(ValueNumStore* vnStore, - GenTree* tree, - ssize_t* byteOffset, - CORINFO_OBJECT_HANDLE* pObj) +bool Compiler::GetObjectHandleAndOffset(GenTree* tree, ssize_t* byteOffset, CORINFO_OBJECT_HANDLE* pObj) { if (!tree->gtVNPair.BothEqual()) { @@ -10763,7 +10759,7 @@ bool Compiler::fgValueNumberConstLoad(GenTreeIndir* tree) } } else if (!tree->TypeIs(TYP_REF, TYP_BYREF, TYP_STRUCT) && - GetObjectHandleAndOffset(vnStore, tree->gtGetOp1(), &byteOffset, &obj)) + GetObjectHandleAndOffset(tree->gtGetOp1(), &byteOffset, &obj)) { // See if we can fold IND(ADD(FrozenObj, CNS)) to a constant assert(obj != nullptr); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index dc5f36deca8de..2db53af377ac8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace System.Text { @@ -147,11 +149,38 @@ private unsafe string GetStringForSmallInput(byte[] bytes) return new string(new ReadOnlySpan(ref *pDestination, charsWritten)); // this overload of ROS ctor doesn't validate length } - // TODO: Make this [Intrinsic] and handle JIT-time UTF8 encoding of literal `chars`. /// - public override unsafe bool TryGetBytes(ReadOnlySpan chars, Span bytes, out int bytesWritten) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override bool TryGetBytes(ReadOnlySpan chars, Span bytes, out int bytesWritten) { - return base.TryGetBytes(chars, bytes, out bytesWritten); + int written = ReadUtf8( + ref MemoryMarshal.GetReference(chars), chars.Length, + ref MemoryMarshal.GetReference(bytes), bytes.Length); + + if (written >= 0) + { + bytesWritten = written; + return true; + } + + bytesWritten = 0; + return false; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + [Intrinsic] // Can be unrolled by JIT if input points to a constant string (with constant inputLength). + internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte output, int outputLength) + { + if (inputLength == 0 || outputLength == 0) + { + return 0; + } + + fixed (char* pInput = &input) + fixed (byte* pOutput = &output) + { + return ((UTF8EncodingSealed)UTF8).GetBytesCommon(pInput, inputLength, pOutput, outputLength); + } } } } diff --git a/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs b/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs new file mode 100644 index 0000000000000..f44ae831687c7 --- /dev/null +++ b/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs @@ -0,0 +1,244 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading; + +public class GetUtf8Bytes +{ + static int Main(string[] args) + { + // Warm up for PGO + for (int i=0; i<200; i++) + { + Test_empty(); + Test_hello(); + Test_CJK(); + Test_SIMD(); + Thread.Sleep(10); + } + + return 100; + } + + static void Test_empty() + { + byte[] bytes = new byte[100]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 6); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 0); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_5(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("", buffer, out bytesWritten); + } + + static void Test_hello() + { + byte[] bytes = new byte[100]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 6); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(5, bytesWritten); + AssertIsTrue(span.SequenceEqual("hello\0"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 5); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(5, bytesWritten); + AssertIsTrue(span.SequenceEqual("hello"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 1); + AssertIsTrue(!TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 0); + AssertIsTrue(!TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(span.SequenceEqual(""u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_5(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("hello", buffer, out bytesWritten); + } + + static void Test_CJK() + { + byte[] bytes = new byte[100]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 3); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(3, bytesWritten); + AssertIsTrue(span.SequenceEqual(new byte[] { 0xE9, 0x89, 0x84 })); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_5(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("\u9244", buffer, out bytesWritten); + } + + static void Test_SIMD() + { + byte[] bytes = new byte[1024]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 15); + AssertIsTrue(TryGetBytes_15(span, out bytesWritten)); + AssertEquals(15, bytesWritten); + AssertIsTrue(span.SequenceEqual("000011112222333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 16); + AssertIsTrue(TryGetBytes_16(span, out bytesWritten)); + AssertEquals(16, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 31); + AssertIsTrue(TryGetBytes_31(span, out bytesWritten)); + AssertEquals(31, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333000011112222333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 32); + AssertIsTrue(TryGetBytes_32(span, out bytesWritten)); + AssertEquals(32, bytesWritten); + AssertIsTrue(span.SequenceEqual("00001111222233330000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 64); + AssertIsTrue(TryGetBytes_64(span, out bytesWritten)); + AssertEquals(64, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333000011112222333300001111222233330000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 128); + AssertIsTrue(TryGetBytes_128(span, out bytesWritten)); + AssertEquals(128, bytesWritten); + AssertIsTrue(span.SequenceEqual("00001111222233330000111122223333000011112222333300001111222233330000111122223333000011112222333300001111222233330000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 31); + AssertIsTrue(TryGetBytes_31(span, out bytesWritten)); + AssertEquals(31, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333000011112222333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + + span = bytes.AsSpan(); + AssertIsTrue(!TryGetBytes_16(span.Slice(0, 15), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_31(span.Slice(0, 30), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_32(span.Slice(0, 31), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_64(span.Slice(0, 63), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_128(span.Slice(0, 127), out bytesWritten)); + AssertEquals(0, bytesWritten); + + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_15(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("000011112222333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_16(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("0000111122223333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_31(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("0000111122223333000011112222333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_32(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("00001111222233330000111122223333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_64(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("0000111122223333000011112222333300001111222233330000111122223333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_128(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("00001111222233330000111122223333000011112222333300001111222233330000111122223333000011112222333300001111222233330000111122223333", buffer, out bytesWritten); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void AssertIsTrue(bool value) + { + if (!value) + throw new Exception(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void AssertEquals(int actual, int expected) + { + if (expected != actual) + throw new Exception($"{actual} != {expected}"); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void IsEmpty(Span span) + { + foreach (byte item in span) + { + if (item != 0) + throw new Exception($"{item} != 0"); + } + } +} diff --git a/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj b/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj new file mode 100644 index 0000000000000..e92b43a7dd71f --- /dev/null +++ b/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj @@ -0,0 +1,11 @@ + + + Exe + True + + + + + + + From dc8a128ad905e2b4a4654128e43557ef021420e9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 12 Jul 2023 10:37:23 +0200 Subject: [PATCH 02/10] Clean up --- src/coreclr/jit/compiler.h | 27 ++++------ src/coreclr/jit/helperexpansion.cpp | 78 +++++++++++++---------------- 2 files changed, 45 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1575bb2bf753e..3bf735c290e83 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8951,7 +8951,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX public: // Similar to roundUpSIMDSize, but for General Purpose Registers (GPR) - unsigned int roundUpGPRSize(unsigned size) + unsigned roundUpGPRSize(unsigned size) { if (size > 4 && (REGSIZE_BYTES == 8)) { @@ -8964,37 +8964,28 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return size; // 2, 1, 0 } - var_types roundDownMaxRegSize(unsigned size) + var_types roundDownMaxType(unsigned size) { assert(size > 0); - ssize_t maxRegSize = TARGET_POINTER_SIZE; -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) - if (IsBaselineSimdIsaSupported()) + var_types result = TYP_UNDEF; +#ifdef FEATURE_SIMD + if (IsBaselineSimdIsaSupported() && (roundDownSIMDSize(size) > 0)) { - maxRegSize = max(maxRegSize, (ssize_t)roundDownSIMDSize((unsigned)size)); + return getSIMDTypeForSize(roundDownSIMDSize(size)); } #endif int nearestPow2 = 1 << BitOperations::Log2((unsigned)size); - switch (min((int)maxRegSize, nearestPow2)) + switch (min(nearestPow2, REGSIZE_BYTES)) { case 1: - return TYP_UBYTE; + return TYP_BYTE; case 2: return TYP_USHORT; case 4: return TYP_INT; case 8: + assert(REGSIZE_BYTES == 8); return TYP_LONG; -#ifdef FEATURE_SIMD - case 16: - return TYP_SIMD16; -#ifdef TARGET_XARCH - case 32: - return TYP_SIMD32; - case 64: - return TYP_SIMD64; -#endif -#endif default: unreached(); } diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 7e6b3a8b90890..6b08b7071e5e9 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1233,7 +1233,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement //------------------------------------------------------------------------------ // fgVNBasedIntrinsicExpansionForCall_ReadUtf8 : Expand NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8 -// when src data is a string literal (UTF16) tha can be narrowed to ASCII (UTF8), e.g.: +// when src data is a string literal (UTF16) that can be narrowed to ASCII (UTF8), e.g.: // // string str = "Hello, world!"; // int bytesWritten = ReadUtf8(ref str[0], str.Length, buffer, buffer.Length); @@ -1241,10 +1241,10 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement // becomes: // // bytesWritten = 0; // default value -// if (buffer.Length >= str.Length) // *might* be folded if buffer.Length is a constant +// if (buffer.Length >= 13) // { -// memcpy(buffer, "Hello, world!"u8, str.Length); // note the u8 suffix -// bytesWritten = str.Length; +// memcpy(buffer, "Hello, world!"u8, 13); // note the u8 suffix +// bytesWritten = 13; // } // // Arguments: @@ -1259,7 +1259,6 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, { BasicBlock* block = *pBlock; - // NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8 assert(call->gtArgs.CountUserArgs() == 4); GenTree* srcPtr = call->gtArgs.GetUserArgByIndex(0)->GetNode(); @@ -1268,7 +1267,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // srcLen doesn't have to match the srcPtr's length, but it should not exceed it. ssize_t strObjOffset = 0; CORINFO_OBJECT_HANDLE strObj = nullptr; - if (!GetObjectHandleAndOffset(srcPtr, &strObjOffset, &strObj) || (strObjOffset > INT_MAX)) + if (!GetObjectHandleAndOffset(srcPtr, &strObjOffset, &strObj) || ((size_t)strObjOffset > INT_MAX)) { // We might want to support more cases here, e.g. ROS RVA data. // Also, we check that strObjOffset (which is in most cases is expected to be just @@ -1295,7 +1294,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, const int MaxPossibleUnrollThreshold = 256; const unsigned unrollThreshold = min(getUnrollThreshold(UnrollKind::Memcpy), MaxPossibleUnrollThreshold); const unsigned srcLenCns = (unsigned)vnStore->GetConstantInt32(srcLen->gtVNPair.GetLiberal()); - if (srcLenCns == 0 || srcLenCns > unrollThreshold) + if ((srcLenCns == 0) || (srcLenCns > unrollThreshold)) { // TODO: handle srcLenCns == 0 if it's a common case JITDUMP("ReadUtf8: srcLenCns is out of unrollable range\n") @@ -1304,14 +1303,14 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // Read the string literal (UTF16) into a local buffer (UTF8) assert(strObj != nullptr); - BYTE srcUtf8cns[MaxPossibleUnrollThreshold * 2] = {}; + BYTE buffer[MaxPossibleUnrollThreshold * 2]; // Both must be within [0..INT_MAX] range as we're going to cast them to int assert((unsigned)srcLenCns <= INT_MAX); assert((unsigned)strObjOffset <= INT_MAX); // getObjectContent is expected to validate the offset and length - if (!info.compCompHnd->getObjectContent(strObj, srcUtf8cns, (int)srcLenCns, (int)strObjOffset)) + if (!info.compCompHnd->getObjectContent(strObj, buffer, (int)srcLenCns * 2, (int)strObjOffset)) { JITDUMP("ReadUtf8: getObjectContent returned false.\n") return false; @@ -1319,7 +1318,8 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, for (unsigned charIndex = 0; charIndex < srcLenCns; charIndex++) { - uint16_t ch = ((uint16_t*)srcUtf8cns)[charIndex]; + // Buffer keeps the original utf16 chars + uint16_t ch = ((uint16_t*)buffer)[charIndex]; if (ch > 127) { // Only ASCII is supported. @@ -1327,8 +1327,8 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, return false; } - // Narrow U16 to U8 in the same buffer (it's ok since we only deal with ASCII) - srcUtf8cns[charIndex] = (BYTE)ch; + // Narrow U16 to U8 in the same buffer + buffer[charIndex] = (BYTE)ch; } DebugInfo debugInfo = stmt->GetDebugInfo(); @@ -1370,7 +1370,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // srcLenCns is the length of the string literal in chars (UTF16) // but we're going to use the same value as the "bytesWritten" result in the fast path and in the length check. GenTree* srcLenCnsNode = gtNewIconNode(srcLenCns); - fgUpdateConstTreeValueNumber(srcLenCnsNode); + fgValueNumberTreeConst(srcLenCnsNode); // We're going to insert the following blocks: // @@ -1391,20 +1391,16 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // // Block 1: lengthCheckBb (we check that dstLen < srcLen) - // In case if destIsKnownToFit is true we'll use this block to keep side-effects of the original arguments. - // and it will be a fall-through block. // BasicBlock* lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true); lengthCheckBb->bbFlags |= BBF_INTERNAL; - GenTree* dstPtr = call->gtArgs.GetUserArgByIndex(2)->GetNode(); - GenTree* dstLen = call->gtArgs.GetUserArgByIndex(3)->GetNode(); - // Set bytesWritten -1 by default, if the fast path is not taken we'll return it as the result. GenTree* bytesWrittenDefaultVal = gtNewStoreLclVarNode(resultLclNum, gtNewIconNode(-1)); fgInsertStmtAtEnd(lengthCheckBb, fgNewStmtFromTree(bytesWrittenDefaultVal, debugInfo)); - GenTree* lengthCheck = gtNewOperNode(GT_LT, TYP_INT, gtClone(dstLen), srcLenCnsNode); + GenTree* dstLen = call->gtArgs.GetUserArgByIndex(3)->GetNode(); + GenTree* lengthCheck = gtNewOperNode(GT_LT, TYP_INT, gtCloneExpr(dstLen), srcLenCnsNode); lengthCheck->gtFlags |= GTF_RELOP_JMP_USED; Statement* lengthCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, lengthCheck), debugInfo); fgInsertStmtAtEnd(lengthCheckBb, lengthCheckStmt); @@ -1422,45 +1418,44 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, fastpathBb->bbFlags |= BBF_INTERNAL; // The widest type we can use for loads - const var_types maxLoadType = roundDownMaxRegSize(srcLenCns); + const var_types maxLoadType = roundDownMaxType(srcLenCns); + assert(genTypeSize(maxLoadType) > 0); // How many iterations we need to copy UTF8 const data to the destination unsigned iterations = srcLenCns / genTypeSize(maxLoadType); // Add one more iteration if we have a remainder - iterations += srcLenCns % genTypeSize(maxLoadType) == 0 ? 0 : 1; + iterations += (srcLenCns % genTypeSize(maxLoadType) == 0) ? 0 : 1; + GenTree* dstPtr = call->gtArgs.GetUserArgByIndex(2)->GetNode(); for (unsigned i = 0; i < iterations; i++) { - ssize_t offest = (ssize_t)i * genTypeSize(maxLoadType); + ssize_t offset = (ssize_t)i * genTypeSize(maxLoadType); // Last iteration: overlap with previous load if needed if (i == iterations - 1) { - offest = (ssize_t)srcLenCns - genTypeSize(maxLoadType); + offset = (ssize_t)srcLenCns - genTypeSize(maxLoadType); } - // We're going to emit the following tree: - - // -A-XG------ * ASG %maxLoadType% (copy) - // D--XG--N--- +--* IND %maxLoadType% - // -------N--- | \--* ADD byref - // ----------- | +--* LCL_VAR byref dstPtr - // ----------- | \--* CNS_INT int %offset% - // ----------- \--* CNS_VEC or CNS_INT representing UTF8 const data chunk + // We're going to emit the following tree (in case of SIMD16 load): + // + // -A-XG------ * STOREIND simd16 (copy) + // -------N--- +--* ADD byref + // ----------- | +--* LCL_VAR byref + // ----------- | \--* CNS_INT int + // ----------- \--* CNS_VEC simd16 - GenTreeIntCon* offsetNode = gtNewIconNode(offest); - fgUpdateConstTreeValueNumber(offsetNode); + GenTreeIntCon* offsetNode = gtNewIconNode(offset); + fgValueNumberTreeConst(offsetNode); // Grab a chunk from srcUtf8cnsData for the given offset and width - GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, srcUtf8cns + offest); - fgUpdateConstTreeValueNumber(utf8cnsChunkNode); + GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, buffer + offset); + fgValueNumberTreeConst(utf8cnsChunkNode); - GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(dstPtr), offsetNode); + GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, TYP_BYREF, gtCloneExpr(dstPtr), offsetNode); GenTreeOp* storeInd = gtNewStoreIndNode(maxLoadType, dstAddOffsetNode, utf8cnsChunkNode); - Statement* storeIndStmt = fgNewStmtFromTree(storeInd, debugInfo); - fgInsertStmtAtEnd(fastpathBb, storeIndStmt); - gtUpdateStmtSideEffects(storeIndStmt); + fgInsertStmtAtEnd(fastpathBb, fgNewStmtFromTree(storeInd, debugInfo)); } // Finally, store the number of bytes written to the resultLcl local @@ -1476,7 +1471,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, fgRemoveRefPred(block, prevBb); // prevBb flows into lengthCheckBb fgAddRefPred(lengthCheckBb, prevBb); - // lengthCheckBb has two successors: block and fastpathBb (if !destIsKnownToFit) + // lengthCheckBb has two successors: block and fastpathBb fgAddRefPred(fastpathBb, lengthCheckBb); fgAddRefPred(block, lengthCheckBb); // fastpathBb flows into block @@ -1488,8 +1483,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // Re-distribute weights // lengthCheckBb->inheritWeight(prevBb); - // we don't have any real world data on how often this fallback path is taken so we just assume 20% of the time - fastpathBb->inheritWeightPercentage(lengthCheckBb, 80); + fastpathBb->inheritWeight(lengthCheckBb); block->inheritWeight(prevBb); // From 89d5c1d0aa0e30704f63d71b631af5363b3a6247 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 12 Jul 2023 10:39:33 +0200 Subject: [PATCH 03/10] Fix test --- .../JIT/opt/Vectorization/{GetUtf8Bytes.cs => ReadUtf8.cs} | 7 ++++--- .../Vectorization/{GetUtf8Bytes.csproj => ReadUtf8.csproj} | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) rename src/tests/JIT/opt/Vectorization/{GetUtf8Bytes.cs => ReadUtf8.cs} (99%) rename src/tests/JIT/opt/Vectorization/{GetUtf8Bytes.csproj => ReadUtf8.csproj} (91%) diff --git a/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs b/src/tests/JIT/opt/Vectorization/ReadUtf8.cs similarity index 99% rename from src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs rename to src/tests/JIT/opt/Vectorization/ReadUtf8.cs index f44ae831687c7..957f6b5b13c7d 100644 --- a/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.cs +++ b/src/tests/JIT/opt/Vectorization/ReadUtf8.cs @@ -6,10 +6,12 @@ using System.Runtime.CompilerServices; using System.Text; using System.Threading; +using Xunit; -public class GetUtf8Bytes +public class ReadUtf8 { - static int Main(string[] args) + [Fact] + public static int TestEntryPoint() { // Warm up for PGO for (int i=0; i<200; i++) @@ -20,7 +22,6 @@ static int Main(string[] args) Test_SIMD(); Thread.Sleep(10); } - return 100; } diff --git a/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj b/src/tests/JIT/opt/Vectorization/ReadUtf8.csproj similarity index 91% rename from src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj rename to src/tests/JIT/opt/Vectorization/ReadUtf8.csproj index e92b43a7dd71f..7cabb6617c4c8 100644 --- a/src/tests/JIT/opt/Vectorization/GetUtf8Bytes.csproj +++ b/src/tests/JIT/opt/Vectorization/ReadUtf8.csproj @@ -1,6 +1,5 @@ - Exe True From 22ca540c5747cfa4b03b1f4ee6d3f91c80004c99 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 12 Jul 2023 11:09:51 +0200 Subject: [PATCH 04/10] fix managed api --- .../src/System/Text/UTF8Encoding.Sealed.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index 2db53af377ac8..b190e1affe6c0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -153,6 +153,7 @@ private unsafe string GetStringForSmallInput(byte[] bytes) [MethodImpl(MethodImplOptions.AggressiveInlining)] public override bool TryGetBytes(ReadOnlySpan chars, Span bytes, out int bytesWritten) { + // returns -1 if the input couldn't be encoded or the output buffer was too small int written = ReadUtf8( ref MemoryMarshal.GetReference(chars), chars.Length, ref MemoryMarshal.GetReference(bytes), bytes.Length); @@ -171,11 +172,16 @@ ref MemoryMarshal.GetReference(chars), chars.Length, [Intrinsic] // Can be unrolled by JIT if input points to a constant string (with constant inputLength). internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte output, int outputLength) { - if (inputLength == 0 || outputLength == 0) + if (inputLength == 0) { return 0; } + if (outputLength == 0) + { + return -1; + } + fixed (char* pInput = &input) fixed (byte* pOutput = &output) { From 5530e10eb7683100f4b7b82b67dc52d21dee1136 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 12 Jul 2023 15:34:07 +0200 Subject: [PATCH 05/10] fix test --- .../src/System/Text/UTF8Encoding.Sealed.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index b190e1affe6c0..866863dc717c3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -185,7 +185,7 @@ internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte ou fixed (char* pInput = &input) fixed (byte* pOutput = &output) { - return ((UTF8EncodingSealed)UTF8).GetBytesCommon(pInput, inputLength, pOutput, outputLength); + return ((UTF8EncodingSealed)UTF8).GetBytesCommon(pInput, inputLength, pOutput, outputLength, throwForDestinationOverflow: false); } } } From 2ea7058cef6d4afbf4bbc71b6f30ad937cefb51e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 13 Jul 2023 14:57:31 +0200 Subject: [PATCH 06/10] Clean up --- .../src/System/Text/UTF8Encoding.Sealed.cs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index 866863dc717c3..5ed53d6193539 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -163,7 +163,6 @@ ref MemoryMarshal.GetReference(chars), chars.Length, bytesWritten = written; return true; } - bytesWritten = 0; return false; } @@ -172,20 +171,13 @@ ref MemoryMarshal.GetReference(chars), chars.Length, [Intrinsic] // Can be unrolled by JIT if input points to a constant string (with constant inputLength). internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte output, int outputLength) { - if (inputLength == 0) - { - return 0; - } - - if (outputLength == 0) - { - return -1; - } - fixed (char* pInput = &input) fixed (byte* pOutput = &output) { - return ((UTF8EncodingSealed)UTF8).GetBytesCommon(pInput, inputLength, pOutput, outputLength, throwForDestinationOverflow: false); + return s_default.GetBytesCommon( + pInput, inputLength, + pOutput, outputLength, + throwForDestinationOverflow: false); } } } From 452f99a0253f9442d8615d34c2e98784cf08ee2f Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 18 Jul 2023 19:01:12 +0200 Subject: [PATCH 07/10] Apply suggestions from code review Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/helperexpansion.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3bf735c290e83..704d13b80bef6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8978,7 +8978,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX switch (min(nearestPow2, REGSIZE_BYTES)) { case 1: - return TYP_BYTE; + return TYP_UBYTE; case 2: return TYP_USHORT; case 4: diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 6b08b7071e5e9..984a536599209 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1218,7 +1218,7 @@ PhaseStatus Compiler::fgVNBasedIntrinsicExpansion() // bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { - assert(call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC); + assert((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0); NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); if (ni == NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) { @@ -1446,14 +1446,14 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // ----------- | \--* CNS_INT int // ----------- \--* CNS_VEC simd16 - GenTreeIntCon* offsetNode = gtNewIconNode(offset); + GenTreeIntCon* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); fgValueNumberTreeConst(offsetNode); // Grab a chunk from srcUtf8cnsData for the given offset and width GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, buffer + offset); fgValueNumberTreeConst(utf8cnsChunkNode); - GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, TYP_BYREF, gtCloneExpr(dstPtr), offsetNode); + GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, dstPtr->TypeGet(), gtCloneExpr(dstPtr), offsetNode); GenTreeOp* storeInd = gtNewStoreIndNode(maxLoadType, dstAddOffsetNode, utf8cnsChunkNode); fgInsertStmtAtEnd(fastpathBb, fgNewStmtFromTree(storeInd, debugInfo)); } From 97a731ba79d2b673744b9bf9ef0426bee38818b0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jul 2023 19:29:52 +0200 Subject: [PATCH 08/10] Address feedback --- src/coreclr/jit/compiler.h | 4 +-- src/coreclr/jit/helperexpansion.cpp | 47 ++++++++++++++++------------- src/coreclr/jit/morph.cpp | 2 +- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 704d13b80bef6..6058a4786f100 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5334,10 +5334,10 @@ class Compiler void SplitTreesRemoveCommas(); template - PhaseStatus fgExpandHelper(bool skipRarelyRunBlocks, bool handleIntrinsics = false); + PhaseStatus fgExpandHelper(bool skipRarelyRunBlocks); template - bool fgExpandHelperForBlock(BasicBlock** pBlock, bool handleIntrinsics = false); + bool fgExpandHelperForBlock(BasicBlock** pBlock); PhaseStatus fgExpandRuntimeLookups(); bool fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 371bcf4be57e9..09177efd601e9 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -116,9 +116,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { BasicBlock* block = *pBlock; - assert(call->IsHelperCall()); - if (!call->IsExpRuntimeLookup()) + if (!call->IsHelperCall() || !call->IsExpRuntimeLookup()) { return false; } @@ -472,8 +471,8 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { BasicBlock* block = *pBlock; - assert(call->IsHelperCall()); - if (!call->IsExpTLSFieldAccess()) + + if (!call->IsHelperCall() || !call->IsExpTLSFieldAccess()) { return false; } @@ -817,7 +816,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // true if there was any helper that was expanded. // template -PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks, bool handleIntrinsics) +PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) @@ -830,7 +829,7 @@ PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks, bool handleIntrin // Expand and visit the last block again to find more candidates INDEBUG(BasicBlock* origBlock = block); - while (fgExpandHelperForBlock(&block, handleIntrinsics)) + while (fgExpandHelperForBlock(&block)) { result = PhaseStatus::MODIFIED_EVERYTHING; #ifdef DEBUG @@ -862,7 +861,7 @@ PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks, bool handleIntrin // true if a helper was expanded // template -bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock, bool handleIntrinsics) +bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock) { for (Statement* const stmt : (*pBlock)->NonPhiStatements()) { @@ -874,14 +873,12 @@ bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock, bool handleIntrinsics for (GenTree* const tree : stmt->TreeList()) { - if (handleIntrinsics) + if (!tree->IsCall()) { - if (!tree->IsCall() || ((tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)) - { - continue; - } + continue; } - else if (!tree->IsHelperCall()) + + if (!tree->IsHelperCall() && ((tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)) { continue; } @@ -949,7 +946,10 @@ PhaseStatus Compiler::fgExpandStaticInit() bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { BasicBlock* block = *pBlock; - assert(call->IsHelperCall()); + if (!call->IsHelperCall()) + { + return false; + } bool isGc = false; StaticHelperReturnValue retValKind = {}; @@ -1208,7 +1208,7 @@ PhaseStatus Compiler::fgVNBasedIntrinsicExpansion() JITDUMP("Optimized for size - bail out.\n") return result; } - return fgExpandHelper<&Compiler::fgVNBasedIntrinsicExpansionForCall>(true, true); + return fgExpandHelper<&Compiler::fgVNBasedIntrinsicExpansionForCall>(true); } //------------------------------------------------------------------------------ @@ -1224,7 +1224,11 @@ PhaseStatus Compiler::fgVNBasedIntrinsicExpansion() // bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { - assert((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0); + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0) + { + return false; + } + NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); if (ni == NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) { @@ -1309,14 +1313,15 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, // Read the string literal (UTF16) into a local buffer (UTF8) assert(strObj != nullptr); - BYTE buffer[MaxPossibleUnrollThreshold * 2]; + uint16_t bufferU16[MaxPossibleUnrollThreshold]; + uint8_t bufferU8[MaxPossibleUnrollThreshold]; // twice smaller because of narrowing // Both must be within [0..INT_MAX] range as we're going to cast them to int assert((unsigned)srcLenCns <= INT_MAX); assert((unsigned)strObjOffset <= INT_MAX); // getObjectContent is expected to validate the offset and length - if (!info.compCompHnd->getObjectContent(strObj, buffer, (int)srcLenCns * 2, (int)strObjOffset)) + if (!info.compCompHnd->getObjectContent(strObj, (uint8_t*)bufferU16, (int)srcLenCns * 2, (int)strObjOffset)) { JITDUMP("ReadUtf8: getObjectContent returned false.\n") return false; @@ -1325,7 +1330,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, for (unsigned charIndex = 0; charIndex < srcLenCns; charIndex++) { // Buffer keeps the original utf16 chars - uint16_t ch = ((uint16_t*)buffer)[charIndex]; + uint16_t ch = bufferU16[charIndex]; if (ch > 127) { // Only ASCII is supported. @@ -1334,7 +1339,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, } // Narrow U16 to U8 in the same buffer - buffer[charIndex] = (BYTE)ch; + bufferU8[charIndex] = (uint8_t)ch; } DebugInfo debugInfo = stmt->GetDebugInfo(); @@ -1456,7 +1461,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, fgValueNumberTreeConst(offsetNode); // Grab a chunk from srcUtf8cnsData for the given offset and width - GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, buffer + offset); + GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, bufferU8 + offset); fgValueNumberTreeConst(utf8cnsChunkNode); GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, dstPtr->TypeGet(), gtCloneExpr(dstPtr), offsetNode); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index c098291a35f03..4372355ee4084 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7712,7 +7712,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) #endif } - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0) { if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) == NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) From d744d254b233df407f7a79184a678d231663ec8e Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 18 Jul 2023 21:32:43 +0200 Subject: [PATCH 09/10] Update helperexpansion.cpp --- src/coreclr/jit/helperexpansion.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 09177efd601e9..e8c237ae1df9c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -878,11 +878,6 @@ bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock) continue; } - if (!tree->IsHelperCall() && ((tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)) - { - continue; - } - if ((this->*ExpansionFunction)(pBlock, stmt, tree->AsCall())) { return true; From 19d2a1ac150c737ede1c1c9f3ff78d155bf4c598 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 23 Jul 2023 21:31:22 +0200 Subject: [PATCH 10/10] Update UTF8Encoding.Sealed.cs --- .../src/System/Text/UTF8Encoding.Sealed.cs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index 5ed53d6193539..31f61b16ee10e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -150,25 +150,13 @@ private unsafe string GetStringForSmallInput(byte[] bytes) } /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] public override bool TryGetBytes(ReadOnlySpan chars, Span bytes, out int bytesWritten) { - // returns -1 if the input couldn't be encoded or the output buffer was too small - int written = ReadUtf8( - ref MemoryMarshal.GetReference(chars), chars.Length, - ref MemoryMarshal.GetReference(bytes), bytes.Length); - - if (written >= 0) - { - bytesWritten = written; - return true; - } - bytesWritten = 0; - return false; + return base.TryGetBytes(chars, bytes, out bytesWritten); } [MethodImpl(MethodImplOptions.NoInlining)] - [Intrinsic] // Can be unrolled by JIT if input points to a constant string (with constant inputLength). + [Intrinsic] // Can be unrolled by JIT internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte output, int outputLength) { fixed (char* pInput = &input)