From ecbd835830717938917afbc9ff032a876e01f5c2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 11:24:30 -0700 Subject: [PATCH 01/12] genRegMask --- src/coreclr/jit/codegeninterface.h | 16 ++++---- src/coreclr/jit/compiler.hpp | 1 + src/coreclr/jit/lsra.cpp | 64 +++++++++++++++--------------- src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/lsrabuild.cpp | 12 +++--- src/coreclr/jit/lsraxarch.cpp | 4 +- src/coreclr/jit/target.h | 22 ++++++++-- 7 files changed, 69 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index a025285cbc091..608c72c22d48d 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -75,31 +75,31 @@ class CodeGenInterface } #if defined(TARGET_AMD64) - SingleTypeRegSet rbmAllFloat; - SingleTypeRegSet rbmFltCalleeTrash; + regMaskTP rbmAllFloat; + regMaskTP rbmFltCalleeTrash; - FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const + FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - SingleTypeRegSet rbmAllMask; - SingleTypeRegSet rbmMskCalleeTrash; + regMaskTP rbmAllMask; + regMaskTP rbmMskCalleeTrash; // Call this function after the equivalent fields in Compiler have been initialized. void CopyRegisterInfo(); - FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const + FORCEINLINE regMaskTP get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 40458c51df36c..6ea6404eb7ae4 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -989,6 +989,7 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) regNumber regNum = (regNumber)BitScanForward(mask); + mask ^= genRegMask(regNum); return regNum; } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b642d59bca05a..5671267fa4e13 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -278,7 +278,7 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo RefPosition* kill = nextKill; while ((kill != nullptr) && (kill->nodeLocation < nextLocation)) { - if ((kill->registerAssignment & genRegMask(regRecord->regNum)) != RBM_NONE) + if ((kill->registerAssignment & genSingleTypeRegMask(regRecord->regNum)) != RBM_NONE) { nextLocation = kill->nodeLocation; break; @@ -309,7 +309,7 @@ SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, while (candidates != RBM_NONE) { regNumber regNum = genFirstRegNumFromMask(candidates); - SingleTypeRegSet candidateBit = genRegMask(regNum); + SingleTypeRegSet candidateBit = genSingleTypeRegMask(regNum); candidates ^= candidateBit; RegRecord* physRegRecord = getRegisterRecord(regNum); @@ -658,7 +658,7 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT bool LinearScan::conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition) { // Is this a fixed reference of this register? If so, there is no conflict. - if (refPosition->isFixedRefOfRegMask(genRegMask(regNum))) + if (refPosition->isFixedRefOfRegMask(genSingleTypeRegMask(regNum))) { return false; } @@ -3289,7 +3289,7 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL // bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord) { - regMaskTP candidateBit = genRegMask(physRegRecord->regNum); + SingleTypeRegSet candidateBit = genSingleTypeRegMask(physRegRecord->regNum); LsraLocation refLocation = refPosition->nodeLocation; // We shouldn't be calling this if we haven't already determined that the register is not // busy until the next kill. @@ -3507,7 +3507,7 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) // Assign the given physical register interval to the given interval void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) { - SingleTypeRegSet assignedRegMask = genRegMask(regRec->regNum); + SingleTypeRegSet assignedRegMask = genSingleTypeRegMask(regRec->regNum); compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true)); interval->assignedReg = regRec; @@ -4645,7 +4645,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) assignPhysReg(targetRegRecord, interval); } if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg && - interval->recentRefPosition->registerAssignment != genRegMask(targetReg)) + interval->recentRefPosition->registerAssignment != genSingleTypeRegMask(targetReg)) { interval->getNextRefPosition()->outOfOrder = true; } @@ -5265,7 +5265,7 @@ void LinearScan::allocateRegistersMinimal() if (assignedRegister != REG_NA) { isInRegister = true; - assignedRegBit = genRegMask(assignedRegister); + assignedRegBit = genSingleTypeRegMask(assignedRegister); if (!currentInterval->isActive) { assert(!RefTypeIsUse(refType)); @@ -5450,7 +5450,7 @@ void LinearScan::allocateRegistersMinimal() // If we allocated a register, record it if (assignedRegister != REG_NA) { - assignedRegBit = genRegMask(assignedRegister); + assignedRegBit = genSingleTypeRegMask(assignedRegister); regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); regsInUseThisLocation |= regMask; if (currentRefPosition.delayRegFree) @@ -6197,7 +6197,7 @@ void LinearScan::allocateRegisters() if (assignedRegister != REG_NA) { isInRegister = true; - assignedRegBit = genRegMask(assignedRegister); + assignedRegBit = genSingleTypeRegMask(assignedRegister); if (!currentInterval->isActive) { // If this is a use, it must have started the block on the stack, but the register @@ -6668,7 +6668,7 @@ void LinearScan::allocateRegisters() // If we allocated a register, record it if (assignedRegister != REG_NA) { - assignedRegBit = genRegMask(assignedRegister); + assignedRegBit = genSingleTypeRegMask(assignedRegister); regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); regsInUseThisLocation |= regMask; if (currentRefPosition.delayRegFree) @@ -8731,7 +8731,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, assert(fromReg != REG_NA); if (fromReg != REG_STK) { - freeRegs &= ~genRegMask(fromReg ARM_ARG(getIntervalForLocalVar(varIndex)->registerType)); + freeRegs &= ~genSingleTypeRegMask(fromReg ARM_ARG(getIntervalForLocalVar(varIndex)->registerType)); } if (toBlock != nullptr) @@ -8740,7 +8740,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, assert(toReg != REG_NA); if (toReg != REG_STK) { - freeRegs &= ~genRegMask(toReg ARM_ARG(getIntervalForLocalVar(varIndex)->registerType)); + freeRegs &= ~genSingleTypeRegMask(toReg ARM_ARG(getIntervalForLocalVar(varIndex)->registerType)); } } } @@ -8759,7 +8759,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, assert(reg != REG_NA); if (reg != REG_STK) { - freeRegs &= ~genRegMask(reg ARM_ARG(getIntervalForLocalVar(varIndex)->registerType)); + freeRegs &= ~genSingleTypeRegMask(reg ARM_ARG(getIntervalForLocalVar(varIndex)->registerType)); } } } @@ -9010,17 +9010,17 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) noway_assert(op1 != nullptr && op2 != nullptr); assert(op1->GetRegNum() != REG_NA && op2->GetRegNum() != REG_NA); // No floating point values, so no need to worry about the register type - // (i.e. for ARM32, where we used the genRegMask overload with a type). + // (i.e. for ARM32, where we used the genSingleTypeRegMask overload with a type). assert(varTypeIsIntegralOrI(op1) && varTypeIsIntegralOrI(op2)); - consumedRegs |= genRegMask(op1->GetRegNum()); - consumedRegs |= genRegMask(op2->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(op1->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(op2->GetRegNum()); // Special handling for GT_COPY to not resolve into the source // of switch's operand. if (op1->OperIs(GT_COPY)) { GenTree* srcOp1 = op1->gtGetOp1(); - consumedRegs |= genRegMask(srcOp1->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(srcOp1->GetRegNum()); } else if (op1->IsLocal()) { @@ -9030,7 +9030,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) if (op2->OperIs(GT_COPY)) { GenTree* srcOp2 = op2->gtGetOp1(); - consumedRegs |= genRegMask(srcOp2->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(srcOp2->GetRegNum()); } else if (op2->IsLocal()) { @@ -9058,12 +9058,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) if (lastNode->OperIs(GT_JTRUE, GT_JCMP, GT_JTEST)) { GenTree* op = lastNode->gtGetOp1(); - consumedRegs |= genRegMask(op->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(op->GetRegNum()); if (op->OperIs(GT_COPY)) { GenTree* srcOp = op->gtGetOp1(); - consumedRegs |= genRegMask(srcOp->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(srcOp->GetRegNum()); } else if (op->IsLocal()) { @@ -9074,12 +9074,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) if (lastNode->OperIs(GT_JCMP, GT_JTEST) && !lastNode->gtGetOp2()->isContained()) { op = lastNode->gtGetOp2(); - consumedRegs |= genRegMask(op->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(op->GetRegNum()); if (op->OperIs(GT_COPY)) { GenTree* srcOp = op->gtGetOp1(); - consumedRegs |= genRegMask(srcOp->GetRegNum()); + consumedRegs |= genSingleTypeRegMask(srcOp->GetRegNum()); } else if (op->IsLocal()) { @@ -12830,7 +12830,7 @@ void LinearScan::RegisterSelection::try_BEST_FIT() for (SingleTypeRegSet bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) { regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); - SingleTypeRegSet bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); + SingleTypeRegSet bestFitCandidateBit = genSingleTypeRegMask(bestFitCandidateRegNum); bestFitCandidates ^= bestFitCandidateBit; // Find the next RefPosition of the register. @@ -12929,7 +12929,7 @@ void LinearScan::RegisterSelection::try_REG_ORDER() for (SingleTypeRegSet regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); - SingleTypeRegSet regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); + SingleTypeRegSet regOrderCandidateBit = SingleTypeRegSet(regOrderCandidateRegNum); regOrderCandidates ^= regOrderCandidateBit; unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; @@ -12965,7 +12965,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() for (SingleTypeRegSet spillCandidates = candidates; spillCandidates != RBM_NONE;) { regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); - SingleTypeRegSet spillCandidateBit = genRegMask(spillCandidateRegNum); + SingleTypeRegSet spillCandidateBit = genSingleTypeRegMask(spillCandidateRegNum); spillCandidates ^= spillCandidateBit; RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; @@ -13090,7 +13090,7 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF() for (SingleTypeRegSet farthestCandidates = candidates; farthestCandidates != RBM_NONE;) { regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); - SingleTypeRegSet farthestCandidateBit = genRegMask(farthestCandidateRegNum); + SingleTypeRegSet farthestCandidateBit = genSingleTypeRegMask(farthestCandidateRegNum); farthestCandidates ^= farthestCandidateBit; // Find the next RefPosition of the register. @@ -13123,7 +13123,7 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT() for (SingleTypeRegSet prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) { regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); - SingleTypeRegSet prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); + SingleTypeRegSet prevRegOptCandidateBit = genSingleTypeRegMask(prevRegOptCandidateRegNum); prevRegOptCandidates ^= prevRegOptCandidateBit; Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; bool foundPrevRegOptReg = true; @@ -13226,7 +13226,7 @@ void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate for (; coversCandidates != RBM_NONE;) { regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); + SingleTypeRegSet coversCandidateBit = genSingleTypeRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // The register is considered unassigned if it has no assignedInterval, OR @@ -13254,7 +13254,7 @@ void LinearScan::RegisterSelection::calculateCoversSets() for (; coversCandidates != RBM_NONE;) { regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); + SingleTypeRegSet coversCandidateBit = genSingleTypeRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // If we have a single candidate we don't need to compute the preference-related sets, but we @@ -13565,7 +13565,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); + SingleTypeRegSet checkConflictBit = genSingleTypeRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; @@ -13590,7 +13590,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* if (!found && (currentInterval->assignedReg != nullptr)) { RegRecord* prevRegRec = currentInterval->assignedReg; - prevRegBit = genRegMask(prevRegRec->regNum); + prevRegBit = genSingleTypeRegMask(prevRegRec->regNum); if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE)) { if (!needsConsecutiveRegisters) @@ -13884,7 +13884,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); + SingleTypeRegSet checkConflictBit = genSingleTypeRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e20a92a695426..a518c4621e491 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2387,7 +2387,7 @@ class Interval : public Referenceable // SingleTypeRegSet getCurrentPreferences() { - return (assignedReg == nullptr) ? registerPreferences : genRegMask(assignedReg->regNum); + return (assignedReg == nullptr) ? registerPreferences : genSingleTypeRegMask(assignedReg->regNum); } void mergeRegisterPreferences(SingleTypeRegSet preferences) @@ -2659,7 +2659,7 @@ class RefPosition { referent = r; isPhysRegRef = true; - registerAssignment = genRegMask(r->regNum); + registerAssignment = genSingleTypeRegMask(r->regNum); } regNumber assignedReg() diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3be436c53b018..4a0009351139b 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2870,7 +2870,7 @@ void LinearScan::buildInitialParamDef(const LclVarDsc* varDsc, regNumber paramRe { // Set this interval as currently assigned to that register assert(paramReg < REG_COUNT); - mask = genRegMask(paramReg); + mask = genSingleTypeRegMask(paramReg); assignPhysReg(paramReg, interval); INDEBUG(registersToDump |= getRegMask(paramReg, interval->registerType)); } @@ -2933,7 +2933,7 @@ void LinearScan::stressSetRandomParameterPreferences() } *regs &= ~genRegMask(prefReg); - interval->mergeRegisterPreferences(genRegMask(prefReg)); + interval->mergeRegisterPreferences(genSingleTypeRegMask(prefReg)); } } @@ -3085,7 +3085,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, if (!tree->IsMultiRegNode() || (multiRegIdx == 0)) { assert((dstCandidates == RBM_NONE) || (dstCandidates == genRegMask(tree->GetRegNum()))); - dstCandidates = genRegMask(tree->GetRegNum()); + dstCandidates = genSingleTypeRegMask(tree->GetRegNum()); } else { @@ -3166,7 +3166,7 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida assert(dstCandidates.IsRegNumInMask(thisReg)); dstCandidates.RemoveRegNumFromMask(thisReg); - BuildDef(tree, genRegMask(thisReg), i); + BuildDef(tree, genSingleTypeRegMask(thisReg), i); } } @@ -4379,7 +4379,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - SingleTypeRegSet argMask = genRegMask(argReg); + SingleTypeRegSet argMask = genSingleTypeRegMask(argReg); RefPosition* use = BuildUse(op1, argMask); // Record that this register is occupied by a register now. @@ -4457,7 +4457,7 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* c regNumber argReg = argNode->GetRegNum(); regNumber targetReg = compiler->getCallArgIntRegister(argReg); - buildInternalIntRegisterDefForNode(call, genRegMask(targetReg)); + buildInternalIntRegisterDefForNode(call, genSingleTypeRegMask(targetReg)); } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index fe21be0ec8938..d3a7f075fdd08 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1259,7 +1259,7 @@ int LinearScan::BuildCall(GenTreeCall* call) if (argNode->OperIsPutArgReg()) { srcCount++; - BuildUse(argNode, genRegMask(argNode->GetRegNum())); + BuildUse(argNode, genSingleTypeRegMask(argNode->GetRegNum())); } #ifdef UNIX_AMD64_ABI else if (argNode->OperGet() == GT_FIELD_LIST) @@ -1268,7 +1268,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { assert(use.GetNode()->OperIsPutArgReg()); srcCount++; - BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + BuildUse(use.GetNode(), genSingleTypeRegMask(use.GetNode()->GetRegNum())); } } #endif // UNIX_AMD64_ABI diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 7e144eb9c5b21..9614540803143 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -229,6 +229,8 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif +struct regMaskTP; + typedef regMaskSmall SingleTypeRegSet; struct regMaskTP @@ -739,7 +741,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) extern const regMaskSmall regMasks[REG_COUNT]; -inline SingleTypeRegSet genRegMask(regNumber reg) +inline regMaskTP genRegMask(regNumber reg) { assert((unsigned)reg < ArrLen(regMasks)); #ifdef TARGET_AMD64 @@ -747,7 +749,7 @@ inline SingleTypeRegSet genRegMask(regNumber reg) // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK // and the result needs to be zero. - SingleTypeRegSet result = 1ULL << reg; + regMaskTP result = 1ULL << reg; assert(result == regMasks[reg]); return result; #else @@ -803,10 +805,10 @@ inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* // For registers that are used in pairs, the caller will be handling // each member of the pair separately. // -inline SingleTypeRegSet genRegMask(regNumber regNum, var_types type) +inline regMaskTP genRegMask1(regNumber regNum, var_types type) { #if defined(TARGET_ARM) - SingleTypeRegSet regMask = RBM_NONE; + regMaskTP regMask = RBM_NONE; if (varTypeUsesIntReg(type)) { @@ -824,6 +826,18 @@ inline SingleTypeRegSet genRegMask(regNumber regNum, var_types type) #endif } + +inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) +{ + return genRegMask(reg).getLow(); +} + + +inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) +{ + return genRegMask(reg).getLow(); +} + /***************************************************************************** * * These arrays list the callee-saved register numbers (and bitmaps, respectively) for From ee0b7b66b0a2c54d8bdd19c157271879ee07fa4f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 11:50:49 -0700 Subject: [PATCH 02/12] Make genRegMask() return regMaskTP, introduce genSingleTypeRegMask() for LSRA --- src/coreclr/jit/lsra.h | 37 +++++++++++++++++++++++++++++++- src/coreclr/jit/lsraarm.cpp | 4 ++-- src/coreclr/jit/lsraarm64.cpp | 2 +- src/coreclr/jit/lsraarmarch.cpp | 23 ++++++++++---------- src/coreclr/jit/lsrabuild.cpp | 8 ++++--- src/coreclr/jit/regMaskTPOps.cpp | 2 +- src/coreclr/jit/target.h | 21 +++++------------- 7 files changed, 62 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a518c4621e491..42c399959b05f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -72,6 +72,40 @@ inline bool useFloatReg(var_types type) return (regType(type) == FloatRegisterType); } +//------------------------------------------------------------------------ +// genSingleTypeRegMask: Given a register, generate the appropriate regMask +// +// Arguments: +// regNum - the register of interest +// +// Return Value: +// This will usually return the same value as genRegMask(regNum), except +// that it will return a 64-bits (or 32-bits) entity instead of `regMaskTP`. +// +inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) +{ + return genRegMask(reg).getLow(); +} + +//------------------------------------------------------------------------ +// genSingleTypeRegMask: Given a register, generate the appropriate regMask +// +// Arguments: +// regNum - the register of interest +// type - the type of regNum (i.e. the type it is being used as) +// +// Return Value: +// This will usually return the same value as genRegMask(regNum), except +// that it will return a 64-bits (or 32-bits) entity instead of `regMaskTP`. +// On architectures where multiple registers are used for certain types +// (e.g. TYP_DOUBLE on ARM), it will return a regMask that includes +// all the registers for that type. +// +inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg, var_types type) +{ + return genRegMask(reg).getLow(); +} + //------------------------------------------------------------------------ // RefInfo: Captures the necessary information for a definition that is "in-flight" // during `buildIntervals` (i.e. a tree-node definition has been encountered, @@ -2338,7 +2372,8 @@ class Interval : public Referenceable { // This uses regMasks to handle the case where a double actually occupies two registers // TODO-Throughput: This could/should be done more cheaply. - return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE); + return (physReg != REG_NA && + (genSingleTypeRegMask(physReg, registerType) & genSingleTypeRegMask(regNum)) != RBM_NONE); } // Assign the related interval. diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index f2c60cde13eb0..fc77279eabb75 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -670,7 +670,7 @@ int LinearScan::BuildNode(GenTree* tree) SingleTypeRegSet argMask = RBM_NONE; if (argReg != REG_COUNT) { - argMask = genRegMask(argReg); + argMask = genSingleTypeRegMask(argReg); } // If type of node is `long` then it is actually `double`. @@ -679,7 +679,7 @@ int LinearScan::BuildNode(GenTree* tree) { dstCount++; assert(genRegArgNext(argReg) == REG_NEXT(argReg)); - argMask |= genRegMask(REG_NEXT(argReg)); + argMask |= genSingleTypeRegMask(REG_NEXT(argReg)); dstCount = 2; } if (!tree->gtGetOp1()->isContained()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 5283d2fc00fff..3cf85f2653592 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -103,7 +103,7 @@ void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNu #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE INDEBUG(refPosCount++); assert((consecutiveRefPosition->refType == RefTypeDef) || (consecutiveRefPosition->refType == RefTypeUse)); - consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); + consecutiveRefPosition->registerAssignment = genSingleTypeRegMask(regToAssign); consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); } diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 85f1f66442404..323dea8d4809a 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -174,7 +174,8 @@ int LinearScan::BuildCall(GenTreeCall* call) ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH & ~RBM_LR; if (compiler->getNeedsGSSecurityCookie()) { - ctrlExprCandidates &= ~(genRegMask(REG_GSCOOKIE_TMP_0) | genRegMask(REG_GSCOOKIE_TMP_1)); + ctrlExprCandidates &= + ~(genSingleTypeRegMask(REG_GSCOOKIE_TMP_0) | genSingleTypeRegMask(REG_GSCOOKIE_TMP_1)); } assert(ctrlExprCandidates != RBM_NONE); } @@ -291,7 +292,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } #endif // TARGET_ARM #endif - BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + BuildUse(use.GetNode(), genSingleTypeRegMask(use.GetNode()->GetRegNum())); srcCount++; } } @@ -301,7 +302,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(regCount == abiInfo.NumRegs); for (unsigned int i = 0; i < regCount; i++) { - BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); + BuildUse(argNode, genSingleTypeRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); } srcCount += regCount; } @@ -317,14 +318,14 @@ int LinearScan::BuildCall(GenTreeCall* call) if (argNode->TypeGet() == TYP_LONG) { assert(argNode->IsMultiRegNode()); - BuildUse(argNode, genRegMask(argNode->GetRegNum()), 0); - BuildUse(argNode, genRegMask(genRegArgNext(argNode->GetRegNum())), 1); + BuildUse(argNode, genSingleTypeRegMask(argNode->GetRegNum()), 0); + BuildUse(argNode, genSingleTypeRegMask(genRegArgNext(argNode->GetRegNum())), 1); srcCount += 2; } else #endif // TARGET_ARM { - BuildUse(argNode, genRegMask(argNode->GetRegNum())); + BuildUse(argNode, genSingleTypeRegMask(argNode->GetRegNum())); srcCount++; } } @@ -384,9 +385,9 @@ int LinearScan::BuildCall(GenTreeCall* call) // that we will attach to this node to guarantee that they are available // during generating this node. assert(call->gtFlags & GTF_TLS_GET_ADDR); - newRefPosition(REG_R0, currentLoc, RefTypeFixedReg, nullptr, genRegMask(REG_R0)); - newRefPosition(REG_R1, currentLoc, RefTypeFixedReg, nullptr, genRegMask(REG_R1)); - ctrlExprCandidates = genRegMask(REG_R2); + newRefPosition(REG_R0, currentLoc, RefTypeFixedReg, nullptr, genSingleTypeRegMask(REG_R0)); + newRefPosition(REG_R1, currentLoc, RefTypeFixedReg, nullptr, genSingleTypeRegMask(REG_R1)); + ctrlExprCandidates = genSingleTypeRegMask(REG_R2); } #endif BuildUse(ctrlExpr, ctrlExprCandidates); @@ -541,7 +542,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); - argMask |= genRegMask(thisArgReg); + argMask |= genSingleTypeRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || @@ -582,7 +583,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { - sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + sourceMask = genSingleTypeRegMask((regNumber)((unsigned)argReg + sourceRegCount)); } sourceRegCount++; BuildUse(node, sourceMask, regIndex); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 4a0009351139b..e18b4a2cf5460 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4225,7 +4225,7 @@ int LinearScan::BuildReturn(GenTree* tree) { hasMismatchedRegTypes = true; SingleTypeRegSet dstRegMask = - genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)); + genSingleTypeRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)); if (varTypeUsesIntReg(dstType)) { @@ -4252,7 +4252,9 @@ int LinearScan::BuildReturn(GenTree* tree) if (!hasMismatchedRegTypes || (regType(op1->AsLclVar()->GetFieldTypeByIndex(compiler, i)) == regType(retTypeDesc.GetReturnRegType(i)))) { - BuildUse(op1, genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)), i); + BuildUse(op1, + genSingleTypeRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)), + i); } else { @@ -4411,7 +4413,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) if (node->TypeGet() == TYP_LONG) { srcCount++; - SingleTypeRegSet argMaskHi = genRegMask(REG_NEXT(argReg)); + SingleTypeRegSet argMaskHi = genSingleTypeRegMask(REG_NEXT(argReg)); assert(genRegArgNext(argReg) == REG_NEXT(argReg)); use = BuildUse(op1, argMaskHi, 1); BuildDef(node, argMask, 0); diff --git a/src/coreclr/jit/regMaskTPOps.cpp b/src/coreclr/jit/regMaskTPOps.cpp index 86de50a08cb95..30654e34a34e8 100644 --- a/src/coreclr/jit/regMaskTPOps.cpp +++ b/src/coreclr/jit/regMaskTPOps.cpp @@ -13,7 +13,7 @@ struct regMaskTP; // void regMaskTP::RemoveRegNumFromMask(regNumber reg) { - low &= ~genRegMask(reg); + low &= ~genSingleTypeRegMask(reg); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 9614540803143..74805c706e74c 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -510,8 +510,9 @@ inline bool isByteReg(regNumber reg) } #endif -inline SingleTypeRegSet genRegMask(regNumber reg); -inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); +inline regMaskTP genRegMask(regNumber reg); +inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); +inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); /***************************************************************************** * Return true if the register number is valid @@ -762,7 +763,7 @@ inline regMaskTP genRegMask(regNumber reg) * Map a register number to a floating-point register mask. */ -inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) +inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) { #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ defined(TARGET_RISCV64) @@ -805,7 +806,7 @@ inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* // For registers that are used in pairs, the caller will be handling // each member of the pair separately. // -inline regMaskTP genRegMask1(regNumber regNum, var_types type) +inline regMaskTP genRegMask(regNumber regNum, var_types type) { #if defined(TARGET_ARM) regMaskTP regMask = RBM_NONE; @@ -826,18 +827,6 @@ inline regMaskTP genRegMask1(regNumber regNum, var_types type) #endif } - -inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) -{ - return genRegMask(reg).getLow(); -} - - -inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) -{ - return genRegMask(reg).getLow(); -} - /***************************************************************************** * * These arrays list the callee-saved register numbers (and bitmaps, respectively) for From bd73246dbfe35393e5c09171518df0e2d271897f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 12:15:56 -0700 Subject: [PATCH 03/12] Make allFloat,allMask regMaskTP instead of `SingleTypeRegSet` so no affect on non-LSRA code --- src/coreclr/jit/compiler.cpp | 4 ++-- src/coreclr/jit/compiler.h | 26 +++++++++++++------------- src/coreclr/jit/lsra.cpp | 14 +++++++------- src/coreclr/jit/lsra.h | 26 +++++++++++++------------- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/target.h | 16 +++++++++++++++- 6 files changed, 51 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1b5592a5b59e5..54be15ed7f20b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3485,12 +3485,12 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // Make sure we copy the register info and initialize the // trash regs after the underlying fields are initialized - const SingleTypeRegSet vtCalleeTrashRegs[TYP_COUNT]{ + const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{ #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP }; - memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(SingleTypeRegSet) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); codeGen->CopyRegisterInfo(); #endif // TARGET_XARCH diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 76df03352c55a..73909549f60f1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11246,8 +11246,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // SingleTypeRegSet get_RBM_ALLFLOAT(); - // SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH(); + // regMaskTP get_RBM_ALLFLOAT(); + // regMaskTP get_RBM_FLT_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_FLOAT(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11256,16 +11256,16 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_AMD64 requires one. // - SingleTypeRegSet rbmAllFloat; - SingleTypeRegSet rbmFltCalleeTrash; + regMaskTP rbmAllFloat; + regMaskTP rbmFltCalleeTrash; unsigned cntCalleeTrashFloat; public: - FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const + FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } @@ -11284,8 +11284,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // SingleTypeRegSet get_RBM_ALLMASK(); - // SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH(); + // regMaskTP get_RBM_ALLMASK(); + // regMaskTP get_RBM_MSK_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_MASK(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11294,17 +11294,17 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_XARCH requires one. // - SingleTypeRegSet rbmAllMask; - SingleTypeRegSet rbmMskCalleeTrash; + regMaskTP rbmAllMask; + regMaskTP rbmMskCalleeTrash; unsigned cntCalleeTrashMask; - SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; + regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; public: - FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const + FORCEINLINE regMaskTP get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5671267fa4e13..d54be6d94c998 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -448,7 +448,7 @@ SingleTypeRegSet LinearScan::internalFloatRegCandidates() } else { - return RBM_FLT_CALLEE_TRASH; + return RBM_FLT_CALLEE_TRASH.GetFloatRegSet(); } } @@ -597,7 +597,7 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLER: { - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), minRegCount); } break; @@ -784,7 +784,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #if defined(TARGET_XARCH) rbmAllMask = compiler->rbmAllMask; rbmMskCalleeTrash = compiler->rbmMskCalleeTrash; - memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(SingleTypeRegSet) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); if (!compiler->canUseEvexEncoding()) { @@ -848,10 +848,10 @@ LinearScan::LinearScan(Compiler* theCompiler) availableIntRegs &= ~RBM_FPBASE; #endif // ETW_EBP_FRAMED - availableFloatRegs = RBM_ALLFLOAT; - availableDoubleRegs = RBM_ALLDOUBLE; + availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); + availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); #if defined(TARGET_XARCH) || defined(TARGET_ARM64) - availableMaskRegs = RBM_ALLMASK; + availableMaskRegs = RBM_ALLMASK.GetPredicateRegSet(); #endif #if defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -8782,7 +8782,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, // Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores. if ((freeRegs & RBM_CALLEE_TRASH) != 0) { - freeRegs &= RBM_CALLEE_TRASH; + freeRegs &= RBM_CALLEE_TRASH.GetRegSetForType(type); } regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs)); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 42c399959b05f..bca5930b6df30 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2131,28 +2131,28 @@ class LinearScan : public LinearScanInterface int BuildLclHeap(GenTree* tree); #if defined(TARGET_AMD64) - SingleTypeRegSet rbmAllFloat; - SingleTypeRegSet rbmFltCalleeTrash; + regMaskTP rbmAllFloat; + regMaskTP rbmFltCalleeTrash; - FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const + FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - SingleTypeRegSet rbmAllMask; - SingleTypeRegSet rbmMskCalleeTrash; + regMaskTP rbmAllMask; + regMaskTP rbmMskCalleeTrash; - FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const + FORCEINLINE regMaskTP get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -2173,21 +2173,21 @@ class LinearScan : public LinearScanInterface // static FORCEINLINE SingleTypeRegSet calleeSaveRegs(RegisterType rt) { - static const SingleTypeRegSet varTypeCalleeSaveRegs[] = { + static const regMaskTP varTypeCalleeSaveRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) csr, #include "typelist.h" #undef DEF_TP }; assert((unsigned)rt < ArrLen(varTypeCalleeSaveRegs)); - return varTypeCalleeSaveRegs[rt]; + return varTypeCalleeSaveRegs[rt].GetRegSetForType(rt); } #if defined(TARGET_XARCH) // Not all of the callee trash values are constant, so don't declare this as a method local static // doing so results in significantly more complex codegen and we'd rather just initialize this once // as part of initializing LSRA instead - SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; + regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; #endif // TARGET_XARCH //------------------------------------------------------------------------ @@ -2196,7 +2196,7 @@ class LinearScan : public LinearScanInterface FORCEINLINE SingleTypeRegSet callerSaveRegs(RegisterType rt) const { #if !defined(TARGET_XARCH) - static const SingleTypeRegSet varTypeCalleeTrashRegs[] = { + static const regMaskTP varTypeCalleeTrashRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP @@ -2204,7 +2204,7 @@ class LinearScan : public LinearScanInterface #endif // !TARGET_XARCH assert((unsigned)rt < ArrLen(varTypeCalleeTrashRegs)); - return varTypeCalleeTrashRegs[rt]; + return varTypeCalleeTrashRegs[rt].GetRegSetForType(rt); } }; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e18b4a2cf5460..8a2951ba1f257 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4282,7 +4282,7 @@ int LinearScan::BuildReturn(GenTree* tree) break; case TYP_DOUBLE: // We ONLY want the valid double register in the RBM_DOUBLERET mask. - useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); break; case TYP_LONG: useCandidates = RBM_LNGRET; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 74805c706e74c..ef3c57f5663eb 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -229,7 +229,6 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif -struct regMaskTP; typedef regMaskSmall SingleTypeRegSet; @@ -295,6 +294,21 @@ struct regMaskTP return getLow(); } + SingleTypeRegSet GetIntRegSet() const + { + return getLow(); + } + + SingleTypeRegSet GetFloatRegSet() const + { + return getLow(); + } + + SingleTypeRegSet GetPredicateRegSet() const + { + return getLow(); + } + void RemoveRegNumFromMask(regNumber reg); bool IsRegNumInMask(regNumber reg); From fb14704db38de56689e8495ab6bdff2274685950 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 12:17:33 -0700 Subject: [PATCH 04/12] jit format --- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/lsra.cpp | 7 ++++--- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/target.h | 5 ++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 73909549f60f1..96c8c2b500bbf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11258,7 +11258,7 @@ class Compiler // regMaskTP rbmAllFloat; regMaskTP rbmFltCalleeTrash; - unsigned cntCalleeTrashFloat; + unsigned cntCalleeTrashFloat; public: FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const @@ -11296,7 +11296,7 @@ class Compiler // regMaskTP rbmAllMask; regMaskTP rbmMskCalleeTrash; - unsigned cntCalleeTrashMask; + unsigned cntCalleeTrashMask; regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; public: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d54be6d94c998..a5493f28e71f5 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -597,7 +597,8 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLER: { - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), + minRegCount); } break; @@ -3289,8 +3290,8 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL // bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord) { - SingleTypeRegSet candidateBit = genSingleTypeRegMask(physRegRecord->regNum); - LsraLocation refLocation = refPosition->nodeLocation; + SingleTypeRegSet candidateBit = genSingleTypeRegMask(physRegRecord->regNum); + LsraLocation refLocation = refPosition->nodeLocation; // We shouldn't be calling this if we haven't already determined that the register is not // busy until the next kill. assert(!isRegBusy(physRegRecord->regNum, current->registerType)); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index bca5930b6df30..88919bc52c36d 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2145,7 +2145,7 @@ class LinearScan : public LinearScanInterface #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; + regMaskTP rbmAllMask; regMaskTP rbmMskCalleeTrash; FORCEINLINE regMaskTP get_RBM_ALLMASK() const diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index ef3c57f5663eb..7e8a30bfa79c1 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -229,7 +229,6 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif - typedef regMaskSmall SingleTypeRegSet; struct regMaskTP @@ -524,8 +523,8 @@ inline bool isByteReg(regNumber reg) } #endif -inline regMaskTP genRegMask(regNumber reg); -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); +inline regMaskTP genRegMask(regNumber reg); +inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); /***************************************************************************** From da02969af89810cdc1314ffe9d2070fc85863f0f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 12:26:38 -0700 Subject: [PATCH 05/12] fix build errors --- src/coreclr/jit/lsra.cpp | 22 +++++++++++++++++++++- src/coreclr/jit/lsrabuild.cpp | 4 ++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index a5493f28e71f5..871fab3d8ddcc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -448,7 +448,11 @@ SingleTypeRegSet LinearScan::internalFloatRegCandidates() } else { +#ifdef TARGET_AMD64 return RBM_FLT_CALLEE_TRASH.GetFloatRegSet(); +#else + return RBM_FLT_CALLEE_TRASH; +#endif // TARGET_AMD64 } } @@ -597,8 +601,12 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLER: { +#ifdef TARGET_AMD64 mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), minRegCount); +#else + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH, minRegCount); +#endif // TARGET_AMD64 } break; @@ -849,10 +857,18 @@ LinearScan::LinearScan(Compiler* theCompiler) availableIntRegs &= ~RBM_FPBASE; #endif // ETW_EBP_FRAMED +#ifdef TARGET_AMD64 availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#else + availableFloatRegs = RBM_ALLFLOAT; + availableDoubleRegs = RBM_ALLDOUBLE; +#endif + +#if defined(TARGET_XARCH) availableMaskRegs = RBM_ALLMASK.GetPredicateRegSet(); +#elif defined(TARGET_ARM64) + availableMaskRegs = RBM_ALLMASK; #endif #if defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -8783,7 +8799,11 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, // Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores. if ((freeRegs & RBM_CALLEE_TRASH) != 0) { +#ifdef TARGET_AMD64 freeRegs &= RBM_CALLEE_TRASH.GetRegSetForType(type); +#else + freeRegs &= RBM_CALLEE_TRASH; +#endif } regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs)); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 8a2951ba1f257..0489c617dea10 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4282,7 +4282,11 @@ int LinearScan::BuildReturn(GenTree* tree) break; case TYP_DOUBLE: // We ONLY want the valid double register in the RBM_DOUBLERET mask. +#ifdef TARGET_AMD64 useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); +#else + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); +#endif // TARGET_AMD64 break; case TYP_LONG: useCandidates = RBM_LNGRET; From 6d9e7b8ab6a6f420bf515c9179625312cdcb58ce Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 12:49:18 -0700 Subject: [PATCH 06/12] fix loongarch and risc --- src/coreclr/jit/compiler.hpp | 1 - src/coreclr/jit/lsra.cpp | 4 ++-- src/coreclr/jit/lsraloongarch64.cpp | 10 +++++----- src/coreclr/jit/lsrariscv64.cpp | 13 +++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 6ea6404eb7ae4..40458c51df36c 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -989,7 +989,6 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) regNumber regNum = (regNumber)BitScanForward(mask); - mask ^= genRegMask(regNum); return regNum; } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 871fab3d8ddcc..9512edf88574f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -601,7 +601,7 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLER: { -#ifdef TARGET_AMD64 +#ifdef TARGET_XARCH mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), minRegCount); #else @@ -8799,7 +8799,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, // Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores. if ((freeRegs & RBM_CALLEE_TRASH) != 0) { -#ifdef TARGET_AMD64 +#ifdef TARGET_XARCH freeRegs &= RBM_CALLEE_TRASH.GetRegSetForType(type); #else freeRegs &= RBM_CALLEE_TRASH; diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index c77b8d5c557d3..128cbf0356c14 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -794,7 +794,7 @@ int LinearScan::BuildCall(GenTreeCall* call) #ifdef DEBUG assert(use.GetNode()->OperIs(GT_PUTARG_REG)); #endif - BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + BuildUse(use.GetNode(), genSingleTypeRegMask(use.GetNode()->GetRegNum())); srcCount++; } } @@ -804,7 +804,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(regCount == abiInfo.NumRegs); for (unsigned int i = 0; i < regCount; i++) { - BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); + BuildUse(argNode, genSingleTypeRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); } srcCount += regCount; } @@ -814,7 +814,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(argNode->GetRegNum() == argReg); HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); { - BuildUse(argNode, genRegMask(argNode->GetRegNum())); + BuildUse(argNode, genSingleTypeRegMask(argNode->GetRegNum())); srcCount++; } } @@ -999,7 +999,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); - argMask |= genRegMask(thisArgReg); + argMask |= genSingleTypeRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } @@ -1026,7 +1026,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) regMaskTP sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { - sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + sourceMask = genSingleTypeRegMask((regNumber)((unsigned)argReg + sourceRegCount)); } sourceRegCount++; BuildUse(node, sourceMask, 0); diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index ebea9cce71472..2983c035fe8d1 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -885,7 +885,8 @@ int LinearScan::BuildCall(GenTreeCall* call) ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; if (compiler->getNeedsGSSecurityCookie()) { - ctrlExprCandidates &= ~(genRegMask(REG_GSCOOKIE_TMP_0) | genRegMask(REG_GSCOOKIE_TMP_1)); + ctrlExprCandidates &= + ~(genSingleTypeRegMask(REG_GSCOOKIE_TMP_0) | genSingleTypeRegMask(REG_GSCOOKIE_TMP_1)); } assert(ctrlExprCandidates != RBM_NONE); } @@ -957,7 +958,7 @@ int LinearScan::BuildCall(GenTreeCall* call) #ifdef DEBUG assert(use.GetNode()->OperIs(GT_PUTARG_REG)); #endif - BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + BuildUse(use.GetNode(), genSingleTypeRegMask(use.GetNode()->GetRegNum())); srcCount++; } } @@ -967,7 +968,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(regCount == abiInfo.NumRegs); for (unsigned int i = 0; i < regCount; i++) { - BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); + BuildUse(argNode, genSingleTypeRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); } srcCount += regCount; } @@ -977,7 +978,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(argNode->GetRegNum() == argReg); HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); { - BuildUse(argNode, genRegMask(argNode->GetRegNum())); + BuildUse(argNode, genSingleTypeRegMask(argNode->GetRegNum())); srcCount++; } } @@ -1149,7 +1150,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); - argMask |= genRegMask(thisArgReg); + argMask |= genSingleTypeRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || @@ -1181,7 +1182,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { - sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + sourceMask = genSingleTypeRegMask((regNumber)((unsigned)argReg + sourceRegCount)); } sourceRegCount++; BuildUse(node, sourceMask, regIndex); From 53b93865ca42d8594e24b8b024447f26f53be81a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 13:04:13 -0700 Subject: [PATCH 07/12] fix a typo --- src/coreclr/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9512edf88574f..5ef4d06942cfa 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12950,7 +12950,7 @@ void LinearScan::RegisterSelection::try_REG_ORDER() for (SingleTypeRegSet regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); - SingleTypeRegSet regOrderCandidateBit = SingleTypeRegSet(regOrderCandidateRegNum); + SingleTypeRegSet regOrderCandidateBit = genSingleTypeRegMask(regOrderCandidateRegNum); regOrderCandidates ^= regOrderCandidateBit; unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; From 96bcbf4d33dca31f1ebbfd8cc5fe3a9cfc5a5d3b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 14:01:54 -0700 Subject: [PATCH 08/12] move more `genSingleTypeRegMask()` --- src/coreclr/jit/lsra.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5ef4d06942cfa..709f6ea0f3bf3 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5326,7 +5326,7 @@ void LinearScan::allocateRegistersMinimal() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) + else if ((genSingleTypeRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { currentRefPosition.registerAssignment = assignedRegBit; if (!currentInterval->isActive) @@ -5419,7 +5419,8 @@ void LinearScan::allocateRegistersMinimal() if (currentRefPosition.isFixedRegRef && !currentInterval->isActive && (currentInterval->assignedReg != nullptr) && (currentInterval->assignedReg->assignedInterval == currentInterval) && - (genRegMask(currentInterval->assignedReg->regNum) != currentRefPosition.registerAssignment)) + (genSingleTypeRegMask(currentInterval->assignedReg->regNum) != + currentRefPosition.registerAssignment)) { unassignPhysReg(currentInterval->assignedReg, nullptr); } @@ -6170,7 +6171,7 @@ void LinearScan::allocateRegisters() // kill would lead to spill of source but not the putarg_reg if it were treated // as special. if (srcInterval->isActive && - genRegMask(srcInterval->physReg) == currentRefPosition.registerAssignment && + genSingleTypeRegMask(srcInterval->physReg) == currentRefPosition.registerAssignment && currentInterval->getNextRefLocation() == nextFixedRef[srcInterval->physReg]) { assert(physRegRecord->regNum == srcInterval->physReg); @@ -6256,9 +6257,9 @@ void LinearScan::allocateRegisters() // it might be beneficial to keep it in this reg for PART of the lifetime if (currentInterval->isLocalVar) { - regMaskTP preferences = currentInterval->registerPreferences; + SingleTypeRegSet preferences = currentInterval->registerPreferences; bool keepAssignment = true; - bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE; + bool matchesPreferences = (preferences & genSingleTypeRegMask(assignedRegister)) != RBM_NONE; // Will the assigned register cover the lifetime? If not, does it at least // meet the preferences for the next RefPosition? @@ -6339,7 +6340,7 @@ void LinearScan::allocateRegisters() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) + else if ((genSingleTypeRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { #ifdef TARGET_ARM64 if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) @@ -6617,7 +6618,7 @@ void LinearScan::allocateRegisters() if (currentRefPosition.isFixedRegRef && !currentInterval->isActive && (currentInterval->assignedReg != nullptr) && (currentInterval->assignedReg->assignedInterval == currentInterval) && - (genRegMask(currentInterval->assignedReg->regNum) != currentRefPosition.registerAssignment)) + (genSingleTypeRegMask(currentInterval->assignedReg->regNum) != currentRefPosition.registerAssignment)) { unassignPhysReg(currentInterval->assignedReg, nullptr); } @@ -8468,7 +8469,7 @@ void LinearScan::resolveRegisters() varDsc->lvOnFrame = false; } #ifdef DEBUG - regMaskTP registerAssignment = genRegMask(varDsc->GetRegNum()); + regMaskTP registerAssignment = genSingleTypeRegMask(varDsc->GetRegNum()); assert(!interval->isSpilled && !interval->isSplit); RefPosition* refPosition = interval->firstRefPosition; assert(refPosition != nullptr); From 4890c2c6fbf53cefe6e92ac23568dcbfd62454d0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 14:20:09 -0700 Subject: [PATCH 09/12] jit format --- src/coreclr/jit/lsra.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 709f6ea0f3bf3..08c30882d59d0 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -6257,9 +6257,9 @@ void LinearScan::allocateRegisters() // it might be beneficial to keep it in this reg for PART of the lifetime if (currentInterval->isLocalVar) { - SingleTypeRegSet preferences = currentInterval->registerPreferences; - bool keepAssignment = true; - bool matchesPreferences = (preferences & genSingleTypeRegMask(assignedRegister)) != RBM_NONE; + SingleTypeRegSet preferences = currentInterval->registerPreferences; + bool keepAssignment = true; + bool matchesPreferences = (preferences & genSingleTypeRegMask(assignedRegister)) != RBM_NONE; // Will the assigned register cover the lifetime? If not, does it at least // meet the preferences for the next RefPosition? @@ -6618,7 +6618,8 @@ void LinearScan::allocateRegisters() if (currentRefPosition.isFixedRegRef && !currentInterval->isActive && (currentInterval->assignedReg != nullptr) && (currentInterval->assignedReg->assignedInterval == currentInterval) && - (genSingleTypeRegMask(currentInterval->assignedReg->regNum) != currentRefPosition.registerAssignment)) + (genSingleTypeRegMask(currentInterval->assignedReg->regNum) != + currentRefPosition.registerAssignment)) { unassignPhysReg(currentInterval->assignedReg, nullptr); } From 0057bb3928ea9f996f67085c1dcdbe1dcd47c9e9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 15:10:24 -0700 Subject: [PATCH 10/12] Make genRegMask() use genSingleType*() --- src/coreclr/jit/lsra.h | 64 ++++++++++++++++++++++++++++++++++++++-- src/coreclr/jit/target.h | 63 +++++++-------------------------------- 2 files changed, 72 insertions(+), 55 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 88919bc52c36d..a01638efb5301 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -72,6 +72,8 @@ inline bool useFloatReg(var_types type) return (regType(type) == FloatRegisterType); } +extern const regMaskSmall regMasks[REG_COUNT]; + //------------------------------------------------------------------------ // genSingleTypeRegMask: Given a register, generate the appropriate regMask // @@ -84,7 +86,18 @@ inline bool useFloatReg(var_types type) // inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) { - return genRegMask(reg).getLow(); + assert((unsigned)reg < ArrLen(regMasks)); +#ifdef TARGET_AMD64 + // shift is faster than a L1 hit on modern x86 + // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) + // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK + // and the result needs to be zero. + SingleTypeRegSet result = 1ULL << reg; + assert(result == regMasks[reg]); + return result; +#else + return regMasks[reg]; +#endif } //------------------------------------------------------------------------ @@ -101,9 +114,54 @@ inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) // (e.g. TYP_DOUBLE on ARM), it will return a regMask that includes // all the registers for that type. // -inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg, var_types type) +inline SingleTypeRegSet genSingleTypeRegMask(regNumber regNum, var_types type) { - return genRegMask(reg).getLow(); +#if defined(TARGET_ARM) + SingleTypeRegSet regMask = RBM_NONE; + + if (varTypeUsesIntReg(type)) + { + regMask = genSingleTypeRegMask(regNum); + } + else + { + assert(varTypeUsesFloatReg(type)); + regMask = genSingleTypeFloatMask(regNum, type); + } + + return regMask; +#else + return genSingleTypeRegMask(regNum); +#endif +} + +/***************************************************************************** + * + * Map a register number to a floating-point register mask. + */ + +inline SingleTypeRegSet genSingleTypeFloatMask(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) +{ +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ + defined(TARGET_RISCV64) + assert(genIsValidFloatReg(reg)); + assert((unsigned)reg < ArrLen(regMasks)); + return regMasks[reg]; +#elif defined(TARGET_ARM) + assert(floatRegCanHoldType(reg, type)); + assert(reg >= REG_F0 && reg <= REG_F31); + + if (type == TYP_DOUBLE) + { + return regMasks[reg] | regMasks[reg + 1]; + } + else + { + return regMasks[reg]; + } +#else +#error Unsupported or unset target architecture +#endif } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 7e8a30bfa79c1..ee6f7b04df042 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -231,6 +231,11 @@ typedef uint64_t regMaskSmall; typedef regMaskSmall SingleTypeRegSet; +extern const regMaskSmall regMasks[REG_COUNT]; +extern inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); +extern inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg, var_types type); +extern inline SingleTypeRegSet genSingleTypeFloatMask(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); + struct regMaskTP { private: @@ -525,7 +530,7 @@ inline bool isByteReg(regNumber reg) inline regMaskTP genRegMask(regNumber reg); inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); -inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); + /***************************************************************************** * Return true if the register number is valid @@ -753,22 +758,10 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) * Map a register number to a register mask. */ -extern const regMaskSmall regMasks[REG_COUNT]; - inline regMaskTP genRegMask(regNumber reg) { - assert((unsigned)reg < ArrLen(regMasks)); -#ifdef TARGET_AMD64 - // shift is faster than a L1 hit on modern x86 - // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) - // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK - // and the result needs to be zero. - regMaskTP result = 1ULL << reg; - assert(result == regMasks[reg]); - return result; -#else - return regMasks[reg]; -#endif + // TODO: Populate regMaskTP based on reg + return genSingleTypeRegMask(reg); } /***************************************************************************** @@ -778,26 +771,7 @@ inline regMaskTP genRegMask(regNumber reg) inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ - defined(TARGET_RISCV64) - assert(genIsValidFloatReg(reg)); - assert((unsigned)reg < ArrLen(regMasks)); - return regMasks[reg]; -#elif defined(TARGET_ARM) - assert(floatRegCanHoldType(reg, type)); - assert(reg >= REG_F0 && reg <= REG_F31); - - if (type == TYP_DOUBLE) - { - return regMasks[reg] | regMasks[reg + 1]; - } - else - { - return regMasks[reg]; - } -#else -#error Unsupported or unset target architecture -#endif + return regMaskTP(genSingleTypeFloatMask(reg ARM_ARG(type))); } //------------------------------------------------------------------------ @@ -821,23 +795,8 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D // inline regMaskTP genRegMask(regNumber regNum, var_types type) { -#if defined(TARGET_ARM) - regMaskTP regMask = RBM_NONE; - - if (varTypeUsesIntReg(type)) - { - regMask = genRegMask(regNum); - } - else - { - assert(varTypeUsesFloatReg(type)); - regMask = genRegMaskFloat(regNum, type); - } - - return regMask; -#else - return genRegMask(regNum); -#endif + //TODO: Populate regMaskTP based on regNum/type + return genSingleTypeRegMask(regNum ARM_ARG(type)); } /***************************************************************************** From a5e49d90f57cd94f9d58d69ed2b7fce93173546c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 17:06:13 -0700 Subject: [PATCH 11/12] fix build errors --- src/coreclr/jit/lsra.h | 92 ------------------------------------- src/coreclr/jit/target.h | 97 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 97 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a01638efb5301..4ad3bbc7f840c 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -72,98 +72,6 @@ inline bool useFloatReg(var_types type) return (regType(type) == FloatRegisterType); } -extern const regMaskSmall regMasks[REG_COUNT]; - -//------------------------------------------------------------------------ -// genSingleTypeRegMask: Given a register, generate the appropriate regMask -// -// Arguments: -// regNum - the register of interest -// -// Return Value: -// This will usually return the same value as genRegMask(regNum), except -// that it will return a 64-bits (or 32-bits) entity instead of `regMaskTP`. -// -inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) -{ - assert((unsigned)reg < ArrLen(regMasks)); -#ifdef TARGET_AMD64 - // shift is faster than a L1 hit on modern x86 - // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) - // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK - // and the result needs to be zero. - SingleTypeRegSet result = 1ULL << reg; - assert(result == regMasks[reg]); - return result; -#else - return regMasks[reg]; -#endif -} - -//------------------------------------------------------------------------ -// genSingleTypeRegMask: Given a register, generate the appropriate regMask -// -// Arguments: -// regNum - the register of interest -// type - the type of regNum (i.e. the type it is being used as) -// -// Return Value: -// This will usually return the same value as genRegMask(regNum), except -// that it will return a 64-bits (or 32-bits) entity instead of `regMaskTP`. -// On architectures where multiple registers are used for certain types -// (e.g. TYP_DOUBLE on ARM), it will return a regMask that includes -// all the registers for that type. -// -inline SingleTypeRegSet genSingleTypeRegMask(regNumber regNum, var_types type) -{ -#if defined(TARGET_ARM) - SingleTypeRegSet regMask = RBM_NONE; - - if (varTypeUsesIntReg(type)) - { - regMask = genSingleTypeRegMask(regNum); - } - else - { - assert(varTypeUsesFloatReg(type)); - regMask = genSingleTypeFloatMask(regNum, type); - } - - return regMask; -#else - return genSingleTypeRegMask(regNum); -#endif -} - -/***************************************************************************** - * - * Map a register number to a floating-point register mask. - */ - -inline SingleTypeRegSet genSingleTypeFloatMask(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) -{ -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ - defined(TARGET_RISCV64) - assert(genIsValidFloatReg(reg)); - assert((unsigned)reg < ArrLen(regMasks)); - return regMasks[reg]; -#elif defined(TARGET_ARM) - assert(floatRegCanHoldType(reg, type)); - assert(reg >= REG_F0 && reg <= REG_F31); - - if (type == TYP_DOUBLE) - { - return regMasks[reg] | regMasks[reg + 1]; - } - else - { - return regMasks[reg]; - } -#else -#error Unsupported or unset target architecture -#endif -} - //------------------------------------------------------------------------ // RefInfo: Captures the necessary information for a definition that is "in-flight" // during `buildIntervals` (i.e. a tree-node definition has been encountered, diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index ee6f7b04df042..9bf834fc8181f 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -231,11 +231,6 @@ typedef uint64_t regMaskSmall; typedef regMaskSmall SingleTypeRegSet; -extern const regMaskSmall regMasks[REG_COUNT]; -extern inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); -extern inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg, var_types type); -extern inline SingleTypeRegSet genSingleTypeFloatMask(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); - struct regMaskTP { private: @@ -753,6 +748,98 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) } #endif + +extern const regMaskSmall regMasks[REG_COUNT]; + +/***************************************************************************** + * + * Map a register number to a floating-point register mask. + */ +inline SingleTypeRegSet genSingleTypeFloatMask(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) +{ +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ + defined(TARGET_RISCV64) + assert(genIsValidFloatReg(reg)); + assert((unsigned)reg < ArrLen(regMasks)); + return regMasks[reg]; +#elif defined(TARGET_ARM) + assert(floatRegCanHoldType(reg, type)); + assert(reg >= REG_F0 && reg <= REG_F31); + + if (type == TYP_DOUBLE) + { + return regMasks[reg] | regMasks[reg + 1]; + } + else + { + return regMasks[reg]; + } +#else +#error Unsupported or unset target architecture +#endif +} + +//------------------------------------------------------------------------ +// genSingleTypeRegMask: Given a register, generate the appropriate regMask +// +// Arguments: +// regNum - the register of interest +// +// Return Value: +// This will usually return the same value as genRegMask(regNum), except +// that it will return a 64-bits (or 32-bits) entity instead of `regMaskTP`. +// +inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg) +{ + assert((unsigned)reg < ArrLen(regMasks)); +#ifdef TARGET_AMD64 + // shift is faster than a L1 hit on modern x86 + // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) + // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK + // and the result needs to be zero. + SingleTypeRegSet result = 1ULL << reg; + assert(result == regMasks[reg]); + return result; +#else + return regMasks[reg]; +#endif +} + +//------------------------------------------------------------------------ +// genSingleTypeRegMask: Given a register, generate the appropriate regMask +// +// Arguments: +// regNum - the register of interest +// type - the type of regNum (i.e. the type it is being used as) +// +// Return Value: +// This will usually return the same value as genRegMask(regNum), except +// that it will return a 64-bits (or 32-bits) entity instead of `regMaskTP`. +// On architectures where multiple registers are used for certain types +// (e.g. TYP_DOUBLE on ARM), it will return a regMask that includes +// all the registers for that type. +// +inline SingleTypeRegSet genSingleTypeRegMask(regNumber regNum, var_types type) +{ +#if defined(TARGET_ARM) + SingleTypeRegSet regMask = RBM_NONE; + + if (varTypeUsesIntReg(type)) + { + regMask = genSingleTypeRegMask(regNum); + } + else + { + assert(varTypeUsesFloatReg(type)); + regMask = genSingleTypeFloatMask(regNum, type); + } + + return regMask; +#else + return genSingleTypeRegMask(regNum); +#endif +} + /***************************************************************************** * * Map a register number to a register mask. From bbd5d370555cae319d3327c2283267a64c4a8275 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 May 2024 17:06:57 -0700 Subject: [PATCH 12/12] jit format --- src/coreclr/jit/target.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 9bf834fc8181f..932c4ca41cabb 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -523,9 +523,8 @@ inline bool isByteReg(regNumber reg) } #endif -inline regMaskTP genRegMask(regNumber reg); -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); - +inline regMaskTP genRegMask(regNumber reg); +inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); /***************************************************************************** * Return true if the register number is valid @@ -748,7 +747,6 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) } #endif - extern const regMaskSmall regMasks[REG_COUNT]; /***************************************************************************** @@ -882,7 +880,7 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D // inline regMaskTP genRegMask(regNumber regNum, var_types type) { - //TODO: Populate regMaskTP based on regNum/type + // TODO: Populate regMaskTP based on regNum/type return genSingleTypeRegMask(regNum ARM_ARG(type)); }