diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt index 8c966bb3403b5..34b3843d6893e 100644 --- a/src/coreclr/gcinfo/CMakeLists.txt +++ b/src/coreclr/gcinfo/CMakeLists.txt @@ -75,6 +75,10 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) create_gcinfo_lib(TARGET gcinfo_win_x64 OS win ARCH x64) endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64) +endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm) create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 18a6650afc37e..13a7e4629e353 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -745,7 +745,12 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" // // Hardware Intrinsic ISAs // +#if defined(TARGET_LOONGARCH64) +//TODO: should implement LoongArch64's features. +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") +#else RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") +#endif // defined(TARGET_LOONGARCH64) #if defined(TARGET_AMD64) || defined(TARGET_X86) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h index 63a48d0e4ceea..1a7fdb37b9c25 100644 --- a/src/coreclr/inc/crosscomp.h +++ b/src/coreclr/inc/crosscomp.h @@ -399,7 +399,7 @@ enum #define CONTEXT_UNWOUND_TO_CALL 0x20000000 -typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { +typedef struct DECLSPEC_ALIGN(8) _T_CONTEXT { // // Control flags. @@ -414,8 +414,8 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { DWORD64 Ra; DWORD64 Tp; DWORD64 Sp; - DWORD64 A0;//DWORD64 V0; - DWORD64 A1;//DWORD64 V1; + DWORD64 A0; + DWORD64 A1; DWORD64 A2; DWORD64 A3; DWORD64 A4; @@ -447,7 +447,7 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { // // Floating Point Registers // - //TODO: support the SIMD. + //TODO-LoongArch64: support the SIMD. DWORD64 F[32]; DWORD Fcsr; } T_CONTEXT, *PT_CONTEXT; @@ -469,7 +469,6 @@ typedef struct _T_RUNTIME_FUNCTION { }; } T_RUNTIME_FUNCTION, *PT_RUNTIME_FUNCTION; - // // Define exception dispatch context structure. // @@ -489,8 +488,6 @@ typedef struct _T_DISPATCHER_CONTEXT { PBYTE NonVolatileRegisters; } T_DISPATCHER_CONTEXT, *PT_DISPATCHER_CONTEXT; - - // // Nonvolatile context pointer record. // diff --git a/src/coreclr/inc/palclr.h b/src/coreclr/inc/palclr.h index 2ab9c62c3e844..40fe2d1d3a2d1 100644 --- a/src/coreclr/inc/palclr.h +++ b/src/coreclr/inc/palclr.h @@ -606,4 +606,8 @@ #include "palclr_win.h" +#ifndef IMAGE_FILE_MACHINE_LOONGARCH64 +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 // LOONGARCH64. +#endif + #endif // defined(HOST_WINDOWS) diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h index b2d1c06a22d66..9025a8608af0f 100644 --- a/src/coreclr/inc/targetosarch.h +++ b/src/coreclr/inc/targetosarch.h @@ -41,27 +41,38 @@ class TargetArchitecture static const bool IsArm64 = false; static const bool IsArm32 = true; static const bool IsArmArch = true; + static const bool IsLoongArch64 = false; #elif defined(TARGET_ARM64) static const bool IsX86 = false; static const bool IsX64 = false; static const bool IsArm64 = true; static const bool IsArm32 = false; static const bool IsArmArch = true; + static const bool IsLoongArch64 = false; #elif defined(TARGET_AMD64) static const bool IsX86 = false; static const bool IsX64 = true; static const bool IsArm64 = false; static const bool IsArm32 = false; static const bool IsArmArch = false; + static const bool IsLoongArch64 = false; #elif defined(TARGET_X86) static const bool IsX86 = true; static const bool IsX64 = false; static const bool IsArm64 = false; static const bool IsArm32 = false; static const bool IsArmArch = false; + static const bool IsLoongArch64 = false; +#elif defined(TARGET_LOONGARCH64) + static const bool IsX86 = false; + static const bool IsX64 = false; + static const bool IsArm64 = false; + static const bool IsArm32 = false; + static const bool IsArmArch = false; + static const bool IsLoongArch64 = true; #else #error Unknown architecture #endif }; -#endif // targetosarch_h \ No newline at end of file +#endif // targetosarch_h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 9a088b2e9a267..927bf7a238ac5 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -44,6 +44,9 @@ function(create_standalone_jit) elseif(TARGETDETAILS_ARCH STREQUAL "s390x") set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS}) + elseif(TARGETDETAILS_ARCH STREQUAL "loongarch64") + set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS}) else() clr_unknown_arch() endif() @@ -233,6 +236,15 @@ set( JIT_S390X_SOURCES # Not supported as JIT target ) +set( JIT_LOONGARCH64_SOURCES + codegenloongarch64.cpp + emitloongarch64.cpp + lowerloongarch64.cpp + lsraloongarch64.cpp + targetloongarch64.cpp + unwindloongarch64.cpp +) + # We include the headers here for better experience in IDEs. set( JIT_HEADERS ../inc/corinfo.h @@ -379,6 +391,13 @@ set ( JIT_S390X_HEADERS # Not supported as JIT target ) +set( JIT_LOONGARCH64_HEADERS + emitloongarch64.h + emitfmtsloongarch64.h + instrsloongarch64.h + registerloongarch64.h +) + convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES}) convert_to_absolute_path(JIT_HEADERS ${JIT_HEADERS}) convert_to_absolute_path(JIT_RESOURCES ${JIT_RESOURCES}) @@ -397,6 +416,8 @@ convert_to_absolute_path(JIT_ARMV6_SOURCES ${JIT_ARMV6_SOURCES}) convert_to_absolute_path(JIT_ARMV6_HEADERS ${JIT_ARMV6_HEADERS}) convert_to_absolute_path(JIT_S390X_SOURCES ${JIT_S390X_SOURCES}) convert_to_absolute_path(JIT_S390X_HEADERS ${JIT_S390X_HEADERS}) +convert_to_absolute_path(JIT_LOONGARCH64_SOURCES ${JIT_LOONGARCH64_SOURCES}) +convert_to_absolute_path(JIT_LOONGARCH64_HEADERS ${JIT_LOONGARCH64_HEADERS}) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(JIT_ARCH_SOURCES ${JIT_AMD64_SOURCES}) @@ -416,6 +437,9 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) elseif(CLR_CMAKE_TARGET_ARCH_S390X) set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS}) +elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS}) else() clr_unknown_arch() endif() @@ -558,6 +582,10 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .) endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) +endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) create_standalone_jit(TARGET clrjit_win_x86_${ARCH_HOST_NAME} OS win ARCH x86 DESTINATIONS .) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 0c5ae4c0fffa4..f1c1b49b2578b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -235,7 +235,16 @@ class CodeGen final : public CodeGenInterface void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk = nullptr); +#ifdef TARGET_LOONGARCH64 + void genSetRegToIcon(regNumber reg, ssize_t val, var_types type); + void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, + instruction ins, + regNumber reg1, + BasicBlock* failBlk = nullptr, + regNumber reg2 = REG_R0); +#else void genCheckOverflow(GenTree* tree); +#endif //------------------------------------------------------------------------- // @@ -251,7 +260,11 @@ class CodeGen final : public CodeGenInterface // void genEstablishFramePointer(int delta, bool reportUnwindData); +#if defined(TARGET_LOONGARCH64) + void genFnPrologCalleeRegArgs(); +#else void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState); +#endif void genEnregisterIncomingStackArgs(); #if defined(TARGET_ARM64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); @@ -263,7 +276,7 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) bool genInstrWithConstant(instruction ins, emitAttr attr, regNumber reg1, @@ -323,6 +336,7 @@ class CodeGen final : public CodeGenInterface void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); + #else void genPushCalleeSavedRegisters(); #endif @@ -408,7 +422,25 @@ class CodeGen final : public CodeGenInterface FuncletFrameInfoDsc genFuncletInfo; -#endif // TARGET_AMD64 +#elif defined(TARGET_LOONGARCH64) + + // A set of information that is used by funclet prolog and epilog generation. + // It is collected once, before funclet prologs and epilogs are generated, + // and used by all funclet prologs and epilogs, which must all be the same. + struct FuncletFrameInfoDsc + { + regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) + int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function + // (negative) + int fiSP_to_FPRA_save_delta; // FP/RA register save offset from SP (positive) + int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) + int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) + int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. + int fiSpDelta1; // Stack pointer delta 1 (negative) + }; + + FuncletFrameInfoDsc genFuncletInfo; +#endif // TARGET_LOONGARCH64 #if defined(TARGET_XARCH) @@ -598,6 +630,10 @@ class CodeGen final : public CodeGenInterface void genArm64EmitterUnitTests(); #endif +#if defined(DEBUG) && defined(TARGET_LOONGARCH64) + void genLoongArch64EmitterUnitTests(); +#endif + #if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) void genAmd64EmitterUnitTests(); #endif @@ -1234,8 +1270,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForStoreLclFld(GenTreeLclFld* tree); void genCodeForStoreLclVar(GenTreeLclVar* tree); void genCodeForReturnTrap(GenTreeOp* tree); - void genCodeForJcc(GenTreeCC* tree); - void genCodeForSetcc(GenTreeCC* setcc); void genCodeForStoreInd(GenTreeStoreInd* tree); void genCodeForSwap(GenTreeOp* tree); void genCodeForCpObj(GenTreeObj* cpObjNode); @@ -1324,7 +1358,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genTableBasedSwitch(GenTree* tree); void genCodeForArrIndex(GenTreeArrIndex* treeNode); void genCodeForArrOffset(GenTreeArrOffs* treeNode); +#if defined(TARGET_LOONGARCH64) + instruction genGetInsForOper(GenTree* treeNode); +#else instruction genGetInsForOper(genTreeOps oper, var_types type); +#endif bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data); GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd); regNumber getCallIndirectionCellReg(const GenTreeCall* call); @@ -1333,7 +1371,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genJmpMethod(GenTree* jmp); BasicBlock* genCallFinally(BasicBlock* block); void genCodeForJumpTrue(GenTreeOp* jtrue); -#ifdef TARGET_ARM64 +#if defined(TARGET_LOONGARCH64) + // TODO: refactor for LA. + void genCodeForJumpCompare(GenTreeOp* tree); +#endif +#if defined(TARGET_ARM64) void genCodeForJumpCompare(GenTreeOp* tree); void genCodeForMadd(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); @@ -1349,6 +1391,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode); void genMultiRegStoreToLocal(GenTreeLclVar* lclNode); +#if defined(TARGET_LOONGARCH64) + void genMultiRegCallStoreToLocal(GenTree* treeNode); +#endif + // Codegen for multi-register struct returns. bool isStructReturn(GenTree* treeNode); #ifdef FEATURE_SIMD @@ -1364,9 +1410,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genFloatReturn(GenTree* treeNode); #endif // TARGET_X86 -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) void genSimpleReturn(GenTree* treeNode); -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 void genReturn(GenTree* treeNode); @@ -1656,6 +1702,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue); #endif // TARGET_XARCH +#ifndef TARGET_LOONGARCH64 // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions // such as X86's SETcc. A sequence of instructions rather than just a single one is required for // certain floating point conditions. @@ -1699,6 +1746,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); + + void genCodeForJcc(GenTreeCC* tree); + void genCodeForSetcc(GenTreeCC* setcc); +#endif // !TARGET_LOONGARCH64 }; // A simple phase that just invokes a method on the codegen instance diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 61bd8639262e1..0eccb2abfc8e5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -127,9 +127,9 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) /* Assume that we not fully interruptible */ SetInterruptible(false); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) SetHasTailCalls(false); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #ifdef DEBUG genInterruptibleUsed = false; genCurDispOffset = (unsigned)-1; @@ -596,7 +596,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_ASSIGN_BYREF: #if defined(TARGET_AMD64) return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF; #elif defined(TARGET_X86) return RBM_ESI | RBM_EDI | RBM_ECX; @@ -605,7 +605,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) return RBM_CALLEE_TRASH; #endif -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) case CORINFO_HELP_ASSIGN_REF: case CORINFO_HELP_CHECKED_ASSIGN_REF: return RBM_CALLEE_TRASH_WRITEBARRIER; @@ -1171,7 +1171,7 @@ bool CodeGen::genCreateAddrMode( cns += op2->AsIntConCommon()->IconValue(); -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (cns == 0) #endif { @@ -1191,7 +1191,7 @@ bool CodeGen::genCreateAddrMode( goto AGAIN; -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_MUL: if (op1->gtOverflow()) @@ -1214,7 +1214,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !defined(TARGET_ARMARCH) +#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) default: break; @@ -1235,7 +1235,7 @@ bool CodeGen::genCreateAddrMode( switch (op1->gtOper) { -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_ADD: @@ -1294,7 +1294,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !TARGET_ARMARCH +#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 case GT_NOP: @@ -1313,7 +1313,7 @@ bool CodeGen::genCreateAddrMode( noway_assert(op2); switch (op2->gtOper) { -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because // arm doesn't support both scale and offset at the same. Offset is handled // at the emitter as a peephole optimization. @@ -1370,7 +1370,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 case GT_NOP: @@ -1593,7 +1593,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi else { // The code to throw the exception will be generated inline, and - // we will jump around it in the normal non-exception case. + // we will jump around it in the normal non-exception case. BasicBlock* tgtBlk = nullptr; emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind); @@ -1620,6 +1620,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi * have set the flags. Check if the operation caused an overflow. */ +#ifndef TARGET_LOONGARCH64 // inline void CodeGen::genCheckOverflow(GenTree* tree) { @@ -1666,6 +1667,7 @@ void CodeGen::genCheckOverflow(GenTree* tree) genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW); } +#endif #if defined(FEATURE_EH_FUNCLETS) @@ -1815,6 +1817,10 @@ void CodeGen::genGenerateMachineCode() { printf("generic ARM64 CPU"); } + else if (compiler->info.genCPU == CPU_LOONGARCH64) + { + printf("generic LOONGARCH64 CPU"); + } else { printf("unknown architecture"); @@ -2019,7 +2025,7 @@ void CodeGen::genEmitMachineCode() bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) trackedStackPtrsContig = false; #elif defined(TARGET_ARM) // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous @@ -2938,6 +2944,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif + +#ifndef TARGET_LOONGARCH64 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) { #ifdef DEBUG @@ -4228,6 +4236,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop } } +#endif // !TARGET_LOONGARCH64 + #ifdef _PREFAST_ #pragma warning(pop) #endif @@ -4252,6 +4262,11 @@ void CodeGen::genEnregisterIncomingStackArgs() unsigned varNum = 0; +#ifdef TARGET_LOONGARCH64 + int tmp_offset = 0; + regNumber tmp_reg = REG_NA; +#endif + for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) { /* Is this variable a parameter? */ @@ -4298,8 +4313,38 @@ void CodeGen::genEnregisterIncomingStackArgs() assert(regNum != REG_STK); var_types regType = varDsc->GetStackSlotHomeType(); +#ifdef TARGET_LOONGARCH64 + { + bool FPbased; + int base = compiler->lvaFrameAddress(varNum, &FPbased); + if (emitter::isValidSimm12(base)) + { + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); + } + else + { + if (tmp_reg == REG_NA) + { + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + tmp_offset = base; + tmp_reg = REG_R21; + + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8); + } + else + { + int baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset); + } + } + } +#else // !TARGET_LOONGARCH64 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); +#endif // !TARGET_LOONGARCH64 + regSet.verifyRegUsed(regNum); #ifdef USING_SCOPE_INFO psiMoveToReg(varNum); @@ -4600,6 +4645,9 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& #elif defined(TARGET_ARM64) // We will just zero out the entire vector register. This sets it to a double/float zero value GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); +#elif defined(TARGET_LOONGARCH64) + // We will just zero out the entire vector register. This sets it to a double/float zero value + GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -4635,6 +4683,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& #elif defined(TARGET_ARM64) // We will just zero out the entire vector register. This sets it to a double/float zero value GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -4650,6 +4700,8 @@ regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed) { #ifdef TARGET_ARM64 return REG_ZR; +#elif defined(TARGET_LOONGARCH64) + return REG_R0; #else // !TARGET_ARM64 if (*pInitRegZeroed == false) { @@ -5057,11 +5109,14 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed // ARM's emitIns_R_R_I automatically uses the reserved register if necessary. GetEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), compiler->lvaCachedGenericContextArgOffset()); -#else // !ARM64 !ARM +#elif defined(TARGET_LOONGARCH64) + genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), + compiler->lvaCachedGenericContextArgOffset(), REG_R21); +#else // !ARM64 !ARM !LOONGARCH64 // mov [ebp-lvaCachedGenericContextArgOffset()], reg GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), compiler->lvaCachedGenericContextArgOffset()); -#endif // !ARM64 !ARM +#endif // !ARM64 !ARM !LOONGARCH64 } /***************************************************************************** @@ -5444,6 +5499,23 @@ void CodeGen::genFinalizeFrame() maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; #endif // defined(TARGET_XARCH) +#ifdef TARGET_LOONGARCH64 + if (isFramePointerUsed()) + { + // For a FP based frame we have to push/pop the FP register + // + maskCalleeRegsPushed |= RBM_FPBASE; + + // This assert check that we are not using REG_FP + // as both the frame pointer and as a codegen register + // + assert(!regSet.rsRegsModified(RBM_FPBASE)); + } + + // we always push RA. See genPushCalleeSavedRegisters + maskCalleeRegsPushed |= RBM_RA; +#endif // TARGET_LOONGARCH64 + compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); #ifdef DEBUG @@ -5566,10 +5638,10 @@ void CodeGen::genFnProlog() instGen(INS_nop); instGen(INS_BREAKPOINT); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Avoid asserts in the unwind info because these instructions aren't accounted for. compiler->unwindPadding(); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 } #endif // DEBUG @@ -5975,14 +6047,16 @@ void CodeGen::genFnProlog() } #endif // TARGET_XARCH -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genPushCalleeSavedRegisters(initReg, &initRegZeroed); -#else // !TARGET_ARM64 + +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 + if (!isOSRx64Root) { genPushCalleeSavedRegisters(); } -#endif // !TARGET_ARM64 +#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 #ifdef TARGET_ARM bool needToEstablishFP = false; @@ -6013,7 +6087,7 @@ void CodeGen::genFnProlog() //------------------------------------------------------------------------- CLANG_FORMAT_COMMENT_ANCHOR; -#ifndef TARGET_ARM64 +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) regMaskTP maskStackAlloc = RBM_NONE; #ifdef TARGET_ARM @@ -6026,7 +6100,7 @@ void CodeGen::genFnProlog() genAllocLclFrame(compiler->compLclFrameSize + extraFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); } -#endif // !TARGET_ARM64 +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 #ifdef TARGET_AMD64 // For x64 OSR we have to finish saving int callee saves. @@ -6201,6 +6275,13 @@ void CodeGen::genFnProlog() { compiler->lvaUpdateArgsWithInitialReg(); +#if defined(TARGET_LOONGARCH64) + if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn) + { + initRegZeroed = false; + genFnPrologCalleeRegArgs(); + } +#else auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) { if (regState->rsCalleeRegArgMaskLiveIn) { @@ -6237,6 +6318,8 @@ void CodeGen::genFnProlog() assignIncomingRegisterArgs(&intRegState); #endif +#endif // TARGET_LOONGARCH64 + // Home the incoming arguments. genEnregisterIncomingStackArgs(); } @@ -6564,7 +6647,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo structPassingKind howToReturnStruct; var_types returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct); -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType)); #else return (varTypeIsStruct(returnType)); @@ -6672,7 +6755,7 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass) // unsigned CodeGen::getFirstArgWithStackSlot() { -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) unsigned baseVarNum = 0; // Iterate over all the lvParam variables in the Lcl var table until we find the first one // that's passed on the stack. @@ -7834,9 +7917,9 @@ void CodeGen::genReturn(GenTree* treeNode) // exit point where it is actually dead. genConsumeReg(op1); -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genSimpleReturn(treeNode); -#else // !TARGET_ARM64 +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 #if defined(TARGET_X86) if (varTypeUsesFloatReg(treeNode)) { @@ -7864,7 +7947,7 @@ void CodeGen::genReturn(GenTree* treeNode) regNumber retReg = varTypeUsesFloatReg(treeNode) ? REG_FLOATRET : REG_INTRET; inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true); } -#endif // !TARGET_ARM64 +#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 } } @@ -8058,6 +8141,22 @@ void CodeGen::genStructReturn(GenTree* treeNode) GenTreeLclVar* lclNode = actualOp1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); assert(varDsc->lvIsMultiRegRet); +#ifdef TARGET_LOONGARCH64 + // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }", + // i. e. not include the padding for the first field, and so the general loop below won't work. + var_types type = retTypeDesc.GetReturnRegType(0); + regNumber toReg = retTypeDesc.GetABIReturnReg(0); + GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0); + if (regCount > 1) + { + assert(regCount == 2); + int offset = genTypeSize(type); + type = retTypeDesc.GetReturnRegType(1); + offset = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset); + toReg = retTypeDesc.GetABIReturnReg(1); + GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); + } +#else // !TARGET_LOONGARCH64 int offset = 0; for (unsigned i = 0; i < regCount; ++i) { @@ -8066,6 +8165,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); offset += genTypeSize(type); } +#endif // !TARGET_LOONGARCH64 } else { diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index f276a492da33d..dbd53ffbad46f 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -112,7 +112,7 @@ class CodeGenInterface private: #if defined(TARGET_XARCH) static const insFlags instInfo[INS_count]; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) static const BYTE instInfo[INS_count]; #else #error Unsupported target architecture @@ -360,7 +360,7 @@ class CodeGenInterface m_cgInterruptible = value; } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool GetHasTailCalls() { @@ -374,9 +374,9 @@ class CodeGenInterface private: bool m_cgInterruptible; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool m_cgHasTailCalls; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 // The following will be set to true if we've determined that we need to // generate a full-blown pointer register map for the current method. diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 9c09d423e85ef..83efe5685c3ba 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1211,7 +1211,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) assert(spillType != TYP_UNDEF); // TODO-Cleanup: The following code could probably be further merged and cleaned up. -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Load local variable from its home location. // Never allow truncating the locals here, otherwise a subsequent // use of the local with a wider type would see the truncated @@ -1223,6 +1223,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) { spillType = lclLoadType; } + +#if defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum())) + { + spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG; + } +#endif #elif defined(TARGET_ARM) // No normalizing for ARM #else @@ -2518,7 +2525,13 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) m_checkKind = CHECK_NONE; } - m_extendKind = COPY; +#ifdef TARGET_LOONGARCH64 + // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need + // the upper 32bits be sign-extended to 64 bits. + m_extendKind = SIGN_EXTEND_INT; +#else + m_extendKind = COPY; +#endif m_extendSrcSize = 4; } #endif @@ -2595,6 +2608,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) } #endif // !defined(TARGET_64BIT) +#ifndef TARGET_LOONGARCH64 //------------------------------------------------------------------------ // genCodeForJumpTrue: Generate code for a GT_JTRUE node. // @@ -2697,3 +2711,4 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc) inst_SETCC(setcc->gtCondition, setcc->TypeGet(), setcc->GetRegNum()); genProduceReg(setcc); } +#endif // !TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp new file mode 100644 index 0000000000000..afe5b0b95d5bd --- /dev/null +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -0,0 +1,9370 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX LOONGARCH64 Code Generator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_LOONGARCH64 +#include "emit.h" +#include "codegen.h" +#include "lower.h" +#include "gcinfo.h" +#include "gcinfoencoder.h" + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Prolog / Epilog XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +//------------------------------------------------------------------------ +// genInstrWithConstant: we will typically generate one instruction +// +// ins reg1, reg2, imm +// +// However the imm might not fit as a directly encodable immediate, +// when it doesn't fit we generate extra instruction(s) that sets up +// the 'tmpReg' with the proper immediate value. +// +// li tmpReg, imm // li is pseudo instruction here which maybe 2-4 instructions. +// ins reg1, reg2, tmpReg +// +// Arguments: +// ins - instruction +// attr - operation size and GC attribute +// reg1, reg2 - first and second register operands +// imm - immediate value (third operand when it fits) +// tmpReg - temp register to use when the 'imm' doesn't fit. Can be REG_NA +// if caller knows for certain the constant will fit. +// inUnwindRegion - true if we are in a prolog/epilog region with unwind codes. +// Default: false. +// +// Return Value: +// returns true if the immediate was small enough to be encoded inside instruction. If not, +// returns false meaning the immediate was too large and tmpReg was used and modified. +// +bool CodeGen::genInstrWithConstant(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + regNumber tmpReg, + bool inUnwindRegion /* = false */) +{ + emitAttr size = EA_SIZE(attr); + + // reg1 is usually a dest register + // reg2 is always source register + assert(tmpReg != reg2); // tmpReg can not match any source register + +#ifdef DEBUG + switch (ins) + { + case INS_addi_d: + + case INS_st_b: + case INS_st_h: + case INS_st_w: + case INS_fst_s: + case INS_st_d: + case INS_fst_d: + + case INS_ld_b: + case INS_ld_h: + case INS_ld_w: + case INS_fld_s: + case INS_ld_d: + case INS_fld_d: + break; + + default: + assert(!"Unexpected instruction in genInstrWithConstant"); + break; + } +#endif + bool immFitsInIns = emitter::isValidSimm12(imm); + + if (immFitsInIns) + { + // generate a single instruction that encodes the immediate directly + GetEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit + assert(tmpReg != REG_NA); + + // generate two or more instructions + + // first we load the immediate into tmpReg + assert(!EA_IS_RELOC(size)); + GetEmitter()->emitIns_I_la(size, tmpReg, imm); + regSet.verifyRegUsed(tmpReg); + + // when we are in an unwind code region + // we record the extra instructions using unwindPadding() + if (inUnwindRegion) + { + compiler->unwindPadding(); + } + + if (ins == INS_addi_d) + { + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, reg1, reg2, tmpReg); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, tmpReg, reg2, tmpReg); + GetEmitter()->emitIns_R_R_I(ins, attr, reg1, tmpReg, 0); + } + } + return immFitsInIns; +} + +//------------------------------------------------------------------------ +// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog +// or the epilog. The unwind codes for the generated instructions are produced. An available temporary +// register is required to be specified, in case the constant is too large to encode in an "daddu" +// instruction (or "dsubu" instruction if we choose to use one), such that we need to load the constant +// into a register first, before using it. +// +// Arguments: +// spDelta - the value to add to SP (can be negative) +// tmpReg - an available temporary register +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. +// +// Return Value: +// None. + +void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData) +{ + // Even though INS_addi_d is specified here, the encoder will choose either + // an INS_add_d or an INS_addi_d and encode the immediate as a positive value + // + bool wasTempRegisterUsedForImm = + !genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true); + if (wasTempRegisterUsedForImm) + { + if (pTmpRegIsZero != nullptr) + { + *pTmpRegIsZero = false; + } + } + + if (reportUnwindData) + { + // spDelta is negative in the prolog, positive in the epilog, + // but we always tell the unwind codes the positive value. + ssize_t spDeltaAbs = abs(spDelta); + unsigned unwindSpDelta = (unsigned)spDeltaAbs; + assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned + + compiler->unwindAllocStack(unwindSpDelta); + } +} + +//------------------------------------------------------------------------ +// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet +// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction. +// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that +// instruction. +// +// Arguments: +// reg1 - First register of pair to save. +// reg2 - Second register of pair to save. +// spOffset - The offset from SP to store reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or +// zero). +// useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This +// allows us to emit the "save_next" unwind code. +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genPrologSaveRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta <= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both + // FP/SIMD + + instruction ins = INS_st_d; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fst_d; + } + + if (spDelta != 0) + { + // generate addi.d SP,SP,-imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + + assert((spDelta + spOffset + 16) <= 0); + + assert(spOffset <= 2031); // 2047-16 + } + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); +} + +//------------------------------------------------------------------------ +// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or +// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), +// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not +// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it +// (and this function changes to support using pre-indexed SD addressing). The caller must ensure that we can use the +// SD instruction, and that spOffset will be in the legal range for that instruction. +// +// Arguments: +// reg1 - Register to save. +// spOffset - The offset from SP to store reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or +// zero). +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta <= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + + instruction ins = INS_st_d; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fst_d; + } + + if (spDelta != 0) + { + // generate daddiu SP,SP,-imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); +} + +//------------------------------------------------------------------------ +// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. +// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. +// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that +// instruction. +// +// Arguments: +// reg1 - First register of pair to restore. +// reg2 - Second register of pair to restore. +// spOffset - The offset from SP to load reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or +// zero). +// useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This +// allows us to emit the "save_next" unwind code. +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genEpilogRestoreRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta >= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both + // FP/SIMD + + instruction ins = INS_ld_d; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fld_d; + } + + if (spDelta != 0) + { + assert(!useSaveNextPair); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + else + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } +} + +//------------------------------------------------------------------------ +// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. +// +// Arguments: +// reg1 - Register to restore. +// spOffset - The offset from SP to restore reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or +// zero). +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta >= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + + instruction ins = INS_ld_d; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fld_d; + } + + if (spDelta != 0) + { + // ld reg1, offset(SP) + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + // generate add SP,SP,imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + else + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } +} + +//------------------------------------------------------------------------ +// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask. +// The first register pair will contain the lowest register. Register pairs will combine neighbor +// registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with +// odd number of regs) then the second element of that RegPair will be REG_NA. +// +// Arguments: +// regsMask - a mask of registers for prolog/epilog generation; +// regStack - a regStack instance to build the stack in, used to save temp copyings. +// +// Return value: +// no return value; the regStack argument is modified. +// +// static +void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack) +{ + assert(regStack != nullptr); + assert(regStack->Height() == 0); + + unsigned regsCount = genCountBits(regsMask); + + while (regsMask != RBM_NONE) + { + regMaskTP reg1Mask = genFindLowestBit(regsMask); + regNumber reg1 = genRegNumFromMask(reg1Mask); + regsMask &= ~reg1Mask; + regsCount -= 1; + + bool isPairSave = false; + if (regsCount > 0) + { + regMaskTP reg2Mask = genFindLowestBit(regsMask); + regNumber reg2 = genRegNumFromMask(reg2Mask); + if (reg2 == REG_NEXT(reg1)) + { + // The JIT doesn't allow saving pair (S7,FP), even though the + // save_regp register pair unwind code specification allows it. + // The JIT always saves (FP,RA) as a pair, and uses the save_fpra + // unwind code. This only comes up in stress mode scenarios + // where callee-saved registers are not allocated completely + // from lowest-to-highest, without gaps. + if (reg1 != REG_FP) + { + // Both registers must have the same type to be saved as pair. + if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)) + { + isPairSave = true; + + regsMask &= ~reg2Mask; + regsCount -= 1; + + regStack->Push(RegPair(reg1, reg2)); + } + } + } + } + + if (!isPairSave) + { + regStack->Push(RegPair(reg1)); + } + } + assert(regsCount == 0 && regsMask == RBM_NONE); + + genSetUseSaveNextPairs(regStack); +} + +//------------------------------------------------------------------------ +// genSetUseSaveNextPairs: Set useSaveNextPair for each RegPair on the stack which unwind info can be encoded as +// save_next code. +// +// Arguments: +// regStack - a regStack instance to set useSaveNextPair. +// +// Notes: +// We can use save_next for RegPair(N, N+1) only when we have sequence like (N-2, N-1), (N, N+1). +// In this case in the prolog save_next for (N, N+1) refers to save_pair(N-2, N-1); +// in the epilog the unwinder will search for the first save_pair (N-2, N-1) +// and then go back to the first save_next (N, N+1) to restore it first. +// +// static +void CodeGen::genSetUseSaveNextPairs(ArrayStack* regStack) +{ + for (int i = 1; i < regStack->Height(); ++i) + { + RegPair& curr = regStack->BottomRef(i); + RegPair prev = regStack->Bottom(i - 1); + + if (prev.reg2 == REG_NA || curr.reg2 == REG_NA) + { + continue; + } + + if (REG_NEXT(prev.reg2) != curr.reg1) + { + continue; + } + + if (genIsValidFloatReg(prev.reg2) != genIsValidFloatReg(curr.reg1)) + { + // It is possible to support changing of the last int pair with the first float pair, + // but it is very rare case and it would require superfluous changes in the unwinder. + continue; + } + curr.useSaveNextPair = true; + } +} + +//------------------------------------------------------------------------ +// genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask. +// +// Arguments: +// regsMask - a mask of registers for prolog/epilog generation. +// +// Return value: +// stack slot size in bytes. +// +// Note: Because int and float register type sizes match we can call this function with a mask that includes both. +// +// static +int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) +{ + assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_RA)) == regsMask); // Do not expect anything else. + + static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); + return REGSIZE_BYTES; +} + +//------------------------------------------------------------------------ +// genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask. +// +// Arguments: +// regsMask - a mask of registers for prolog generation; +// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); +// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// +void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +{ + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); + + ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); + genBuildRegPairsStack(regsMask, ®Stack); + + for (int i = 0; i < regStack.Height(); ++i) + { + RegPair regPair = regStack.Bottom(i); + if (regPair.reg2 != REG_NA) + { + // We can use two SD instructions. + genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_R21, + nullptr); + + spOffset += 2 * slotSize; + } + else + { + // No register pair; we use a SD instruction. + genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_R21, nullptr); + spOffset += slotSize; + } + + spDelta = 0; // We've now changed SP already, if necessary; don't do it again. + } +} + +//------------------------------------------------------------------------ +// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame +// in the function or funclet prolog. Registers are saved in register number order from low addresses +// to high addresses. This means that integer registers are saved at lower addresses than floatint-point/SIMD +// registers. +// +// If establishing frame pointer chaining, it must be done after saving the callee-saved registers. +// +// We can only use the instructions that are allowed by the unwind codes. The caller ensures that +// there is enough space on the frame to store these registers, and that the store instructions +// we need to use (SD) are encodable with the stack-pointer immediate offsets we need to use. +// +// The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction. +// Note that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the +// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved +// registers, though, we will have an empty aligment slot somewhere. It turns out we will put +// it below (at a lower address) the callee-saved registers, as that is currently how we +// do frame layout. This means that the first stack offset will be 8 and the stack pointer +// adjustment must be done by a SUB, and not folded in to a pre-indexed store. +// +// Arguments: +// regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that +// if non-zero spDelta, then this is the offset of the first save *after* that +// SP adjustment. +// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or +// zero). +// +// Notes: +// The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers. +// +void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) +{ + assert(spDelta <= 0); + + unsigned regsToSaveCount = genCountBits(regsToSaveMask); + if (regsToSaveCount == 0) + { + if (spDelta != 0) + { + // Currently this is the case for varargs only + // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. + genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + return; + } + + assert((spDelta % 16) == 0); + + assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED)); + + // Save integer registers at higher addresses than floating-point registers. + + regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; + + if (maskSaveRegsFloat != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); + spDelta = 0; + lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; + } + + if (maskSaveRegsInt != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); + // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. + } +} + +//------------------------------------------------------------------------ +// genRestoreCalleeSavedRegisterGroup: Restores the group of registers described by the mask. +// +// Arguments: +// regsMask - a mask of registers for epilog generation; +// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); +// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// +void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +{ + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); + + ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); + genBuildRegPairsStack(regsMask, ®Stack); + + int stackDelta = 0; + for (int i = 0; i < regStack.Height(); ++i) + { + bool lastRestoreInTheGroup = (i == regStack.Height() - 1); + bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0); + if (updateStackDelta) + { + // Update stack delta only if it is the last restore (the first save). + assert(stackDelta == 0); + stackDelta = spDelta; + } + + RegPair regPair = regStack.Top(i); + if (regPair.reg2 != REG_NA) + { + spOffset -= 2 * slotSize; + + genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_R21, + nullptr); + } + else + { + spOffset -= slotSize; + genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_R21, nullptr); + } + } +} + +//------------------------------------------------------------------------ +// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame +// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp(). +// +// Arguments: +// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or +// zero). +// +// Here's an example restore sequence: +// ld s7, 88(sp) +// ld s6, 80(sp) +// ld s5, 72(sp) +// ld s4, 64(sp) +// ld s3, 56(sp) +// ld s2, 48(sp) +// ld s1, 40(sp) +// ld s0, 32(sp) +// +// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and +// the last restore adjusts SP by the specified amount. For example: +// ld s7, 56(sp) +// ld s6, 48(sp) +// ld s5, 40(sp) +// ld s4, 32(sp) +// ld s3, 24(sp) +// ld s2, 16(sp) +// ld s1, 88(sp) +// ld s0, 80(sp) +// +// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when +// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store. +// +// Return Value: +// None. + +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) +{ + assert(spDelta >= 0); + unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); + if (regsToRestoreCount == 0) + { + if (spDelta != 0) + { + // Currently this is the case for varargs only + // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. + genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + return; + } + + assert((spDelta % 16) == 0); + + // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED. + assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA)); + + // Point past the end, to start. We predecrement to find the offset to load from. + static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); + int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; + + // Save integer registers at higher addresses than floating-point registers. + + regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; + regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; + + // Restore in the opposite order of saving. + + if (maskRestoreRegsInt != RBM_NONE) + { + int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; + } + + if (maskRestoreRegsFloat != RBM_NONE) + { + // If there is any spDelta, it must be used here. + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + // No need to update spOffset since it's not used after this. + } +} + +// clang-format off +/***************************************************************************** + * + * Generates code for an EH funclet prolog. + * + * Funclets have the following incoming arguments: + * + * catch: a0 = the exception object that was caught (see GT_CATCH_ARG) + * filter: a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function + * finally/fault: none + * + * Funclets set the following registers on exit: + * + * catch: v0 = the address at which execution should resume (see BBJ_EHCATCHRET) + * filter: v0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) + * finally/fault: none + * + * The LOONGARCH64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size, + * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16): + * + * Frame type 1: + * For #framesz <= 32760 and FP/RA at bottom: + * daddiu sp,sp,-#framesz ; establish the frame (predecrement by #framesz), save FP/RA + * sd fp,#outsz(sp) + * sd ra,#outsz+8(sp) + * sd s0,#xxx-8(sp) ; save callee-saved registers, as necessary + * sd s1,#xxx(sp) + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ // To make the whole frame 16 byte aligned. + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * Frame type 2: + * For #framesz <= 32760 and FP/RA at top: + * daddiu sp,sp,-#framesz ; establish the frame + * sd s0,xxx(sp) ; save callee-saved registers, as necessary + * sd s1,xxx+8(sp) + * sd s?,xxx+?(sp) + * sd fp,xxx+?(sp) ; save FP/RA. + * sd ra,xxx+?(sp) + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ // To make the whole frame 16 byte aligned. + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * Frame type 3: + * For #framesz > 32760 and FP/RA at bottom: + * ; for funclet, #framesz-#outsz will be less than 32760. + * + * daddiu sp,sp,-(#framesz-#FPRA_delta) ; note maybe 16byte-alignment. + * sd fp, pad(sp) ; pad is depended on stack-16byte-alignment.. + * sd ra, pad+8(sp) + * sd s0,#xxx(sp) ; save callee-saved registers, as necessary, + * sd s1,#xxx+8(sp) + * daddiu sp,sp,-#outsz ; create space for outgoing argument space, mabye 16byte-alignment. + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * Frame type 4: + * For #framesz > 32760 and FP/RA at top: + * daddiu sp,sp,-#framesz+PSP_offset ; establish the frame, maybe 16byte-alignment. + * sd s0,xxx(sp) ; save callee-saved registers, as necessary + * sd s1,xxx+8(sp) + * sd s?,xxx+?(sp) + * sd fp,xxx+?(sp) ; save FP/RA. + * sd ra,xxx+?(sp) + * + * daddiu sp,sp,-#PSP_offset ; establish the frame, maybe 16byte-alignment. + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ // To make the whole frame 16 byte aligned. + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * + * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, + * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack + * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 224 bytes: + * + * FP,RA registers + * 8 int callee-saved register s0-s7 + * 8 float callee-saved registers f24-f31 + * 8 saved integer argument registers a0-a7, if varargs function + * 1 PSP slot + * 1 alignment slot, future maybe add gp + * == 28 slots * 8 bytes = 224 bytes. + * + * The outgoing argument size, however, can be very large, if we call a function that takes a large number of + * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main + * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of + * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before + * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset + * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument + * space. Both changes to SP might need to add alignment padding. + * + * In addition to the above "standard" frames, we also need to support a frame where the saved FP/RA are at the + * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/RA + * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc. + * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/RA + * changed, but where the alignment padding is placed has also changed. + * + * + * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP + * as in the main function. + * + * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we + * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * + * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. + * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. + * + * if (this is a filter funclet) + * { + * // a1 on entry to a filter funclet is CallerSP of the containing function: + * // either the main function, or the funclet for a handler that this filter is dynamically nested within. + * // Note that a filter can be dynamically nested within a funclet even if it is not statically within + * // a funclet. Consider: + * // + * // try { + * // try { + * // throw new Exception(); + * // } catch(Exception) { + * // throw new Exception(); // The exception thrown here ... + * // } + * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack + * // } filter-handler { + * // } + * // + * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will + * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always + * // create a main function PSP for any function with a filter. + * + * ld a1, CallerSP_to_PSP_slot_delta(a1) ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) + * sd a1, SP_to_PSP_slot_delta(sp) ; store the PSP + * daddiu fp, a1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer + * } + * else + * { + * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. + * // TODO-LOONGARCH64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. + * + * daddiu a3, fp, Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. a3 is scratch? + * sd a3, SP_to_PSP_slot_delta(sp) ; store the PSP + * } + * + * An example epilog sequence is then: + * + * daddiu sp,sp,#outsz ; if any outgoing argument space + * ... ; restore callee-saved registers + * ld s0,#xxx-8(sp) + * ld s1,#xxx(sp) + * ld fp,#framesz-8(sp) + * ld ra,#framesz(sp) + * daddiu sp,sp,#framesz + * jr ra + * + */ +// clang-format on + +void CodeGen::genFuncletProlog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genFuncletProlog()\n"); +#endif + + assert(block != NULL); + assert(block->bbFlags & BBF_FUNCLET_BEG); + + ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); + + gcInfo.gcResetForBB(); + + compiler->unwindBegProlog(); + + regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; + + // Funclets must always save RA and FP, since when we have funclets we must have an FP frame. + assert((maskSaveRegsInt & RBM_RA) != 0); + assert((maskSaveRegsInt & RBM_FP) != 0); + + bool isFilter = (block->bbCatchTyp == BBCT_FILTER); + int frameSize = genFuncletInfo.fiSpDelta1; + + regMaskTP maskArgRegsLiveIn; + if (isFilter) + { + maskArgRegsLiveIn = RBM_A0 | RBM_A1; + } + else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) + { + maskArgRegsLiveIn = RBM_NONE; + } + else + { + maskArgRegsLiveIn = RBM_A0; + } + +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType); + } +#endif + + int offset = 0; + if (genFuncletInfo.fiFrameType == 1) + { + // fiFrameType constraints: + assert(frameSize < 0); + assert(frameSize >= -2048); + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + + maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now + + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, + 0); + } + else if (genFuncletInfo.fiFrameType == 2) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now + + offset = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0); + + genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else + { + unreached(); + } + + // This is the end of the OS-reported prolog for purposes of unwinding + compiler->unwindEndProlog(); + + // If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet frame. + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + if (isFilter) + { + // This is the first block of a filter + // Note that register a1 = CallerSP of the containing function + // A1 is overwritten by the first Load (new callerSP) + // A2 is scratch when we have a large constant offset + + // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or + // function) + genInstrWithConstant(INS_ld_d, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, + REG_A2, false); + regSet.verifyRegUsed(REG_A1); + + // Store the PSP value (aka CallerSP) + genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, + REG_A2, false); + + // re-establish the frame pointer + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1, + genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); + } + else // This is a non-filter funclet + { + // A3 is scratch, A2 can also become scratch. + + // compute the CallerSP, given the frame pointer. a3 is scratch? + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_A3, REG_FPBASE, + -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); + regSet.verifyRegUsed(REG_A3); + + genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, + REG_A2, false); + } + } +} + +/***************************************************************************** + * + * Generates code for an EH funclet epilog. + */ + +void CodeGen::genFuncletEpilog() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFuncletEpilog()\n"); + } +#endif + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + bool unwindStarted = false; + int frameSize = genFuncletInfo.fiSpDelta1; + + if (!unwindStarted) + { + // We can delay this until we know we'll generate an unwindable instruction, if necessary. + compiler->unwindBegEpilog(); + unwindStarted = true; + } + + regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; + regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; + + // Funclets must always save RA and FP, since when we have funclets we must have an FP frame. + assert((maskRestoreRegsInt & RBM_RA) != 0); + assert((maskRestoreRegsInt & RBM_FP) != 0); + +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType); + } +#endif + + regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; + + assert(frameSize < 0); + if (genFuncletInfo.fiFrameType == 1) + { + // fiFrameType constraints: + assert(frameSize >= -2048); + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + + regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end + + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (genFuncletInfo.fiFrameType == 2) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + // first, generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + + int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; + assert(offset2 < 2040); // can amend. + + regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + // second, generate daddiu SP,SP,imm for remaine space. + genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else + { + unreached(); + } + GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + + compiler->unwindEndEpilog(); +} + +/***************************************************************************** + * + * Capture the information used to generate the funclet prologs and epilogs. + * Note that all funclet prologs are identical, and all funclet epilogs are + * identical (per type: filters are identical, and non-filters are identical). + * Thus, we compute the data used for these just once. + * + * See genFuncletProlog() for more information about the prolog/epilog sequences. + */ + +void CodeGen::genCaptureFuncletPrologEpilogInfo() +{ + if (!compiler->ehAnyFunclets()) + { + return; + } + + assert(isFramePointerUsed()); + + // The frame size and offsets must be finalized + assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); + + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); + + regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; + assert((rsMaskSaveRegs & RBM_RA) != 0); + assert((rsMaskSaveRegs & RBM_FP) != 0); + + unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0; + + unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); + assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1)); + + unsigned saveRegsPlusPSPSize = + roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; + + unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); + + assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); + unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + + unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; + assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0); + + int SP_to_FPRA_save_delta = compiler->lvaOutgoingArgSpaceSize; + + unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; + unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); + assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned); + + unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; + assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); + + if (maxFuncletFrameSizeAligned <= (2048 - 8)) + { + genFuncletInfo.fiFrameType = 1; + saveRegsPlusPSPSize -= 2 * 8; // FP/RA + } + else + { + unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; + assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); + + genFuncletInfo.fiFrameType = 2; + saveRegsPlusPSPSize -= 2 * 8; // FP/RA + } + + int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize; + genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; + int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize; + + /* Now save it for future use */ + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta; + + genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; + genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; + +#ifdef DEBUG + if (verbose) + { + printf("\n"); + printf("Funclet prolog / epilog info\n"); + printf(" Save regs: "); + dspRegMask(genFuncletInfo.fiSaveRegs); + printf("\n"); + printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); + printf(" SP to FP/RA save location delta: %d\n", genFuncletInfo.fiSP_to_FPRA_save_delta); + printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); + printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + if (CallerSP_to_PSP_slot_delta != + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging + { + printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); + } + } + } + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta >= 0); +#endif // DEBUG +} + +void CodeGen::genFnEpilog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFnEpilog()\n"); + } +#endif // DEBUG + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars); + gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs; + gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs; + +#ifdef DEBUG + if (compiler->opts.dspCode) + { + printf("\n__epilog:\n"); + } + + if (verbose) + { + printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); + dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); + printf(", gcRegGCrefSetCur="); + printRegMaskInt(gcInfo.gcRegGCrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); + printf(", gcRegByrefSetCur="); + printRegMaskInt(gcInfo.gcRegByrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); + printf("\n"); + } +#endif // DEBUG + + bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); + + GenTree* lastNode = block->lastNode(); + + // Method handle and address info used in case of jump epilog + CORINFO_METHOD_HANDLE methHnd = nullptr; + CORINFO_CONST_LOOKUP addrInfo; + addrInfo.addr = nullptr; + addrInfo.accessType = IAT_VALUE; + + if (jmpEpilog && (lastNode->gtOper == GT_JMP)) + { + methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); + } + + compiler->unwindBegEpilog(); + + if (jmpEpilog) + { + SetHasTailCalls(true); + + noway_assert(block->bbJumpKind == BBJ_RETURN); + noway_assert(block->GetFirstLIRNode() != nullptr); + + /* figure out what jump we have */ + GenTree* jmpNode = lastNode; +#if !FEATURE_FASTTAILCALL + noway_assert(jmpNode->gtOper == GT_JMP); +#else // FEATURE_FASTTAILCALL + // armarch + // If jmpNode is GT_JMP then gtNext must be null. + // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. + noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + + // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp + noway_assert((jmpNode->gtOper == GT_JMP) || + ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + + // The next block is associated with this "if" stmt + if (jmpNode->gtOper == GT_JMP) +#endif // FEATURE_FASTTAILCALL + { + // Simply emit a jump to the methodHnd. This is similar to a call so we can use + // the same descriptor with some minor adjustments. + assert(methHnd != nullptr); + assert(addrInfo.addr != nullptr); + + emitter::EmitCallType callType; + void* addr; + regNumber indCallReg; + switch (addrInfo.accessType) + { + case IAT_VALUE: + // TODO-LOONGARCH64-CQ: using B/BL for optimization. + case IAT_PVALUE: + // Load the address into a register, load indirect and call through a register + // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use + callType = emitter::EC_INDIR_R; + indCallReg = REG_INDIRECT_CALL_TARGET_REG; + addr = NULL; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + if (addrInfo.accessType == IAT_PVALUE) + { + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0); + regSet.verifyRegUsed(indCallReg); + } + break; + + case IAT_RELPVALUE: + { + // Load the address into a register, load relative indirect and call through a register + // We have to use R12 since we assume the argument registers are in use + // LR is used as helper register right before it is restored from stack, thus, + // all relative address calculations are performed before LR is restored. + callType = emitter::EC_INDIR_R; + indCallReg = REG_T2; + addr = NULL; + + regSet.verifyRegUsed(indCallReg); + break; + } + + case IAT_PPVALUE: + default: + NO_WAY("Unsupported JMP indirection"); + } + + /* Simply emit a jump to the methodHnd. This is similar to a call so we can use + * the same descriptor with some minor adjustments. + */ + + genPopCalleeSavedRegisters(true); + + // clang-format off + GetEmitter()->emitIns_Call(callType, + methHnd, + INDEBUG_LDISASM_COMMA(nullptr) + addr, + 0, // argSize + EA_UNKNOWN // retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + DebugInfo(), + indCallReg, // ireg + REG_NA, // xreg + 0, // xmul + 0, // disp + true); // isJump + // clang-format on + CLANG_FORMAT_COMMENT_ANCHOR; + } +#if FEATURE_FASTTAILCALL + else + { + genPopCalleeSavedRegisters(true); + genCallInstruction(jmpNode->AsCall()); + } +#endif // FEATURE_FASTTAILCALL + } + else + { + genPopCalleeSavedRegisters(false); + + GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + } + + compiler->unwindEndEpilog(); +} + +void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (compiler->lvaPSPSym == BAD_VAR_NUM) + { + return; + } + + noway_assert(isFramePointerUsed()); // We need an explicit frame pointer + + int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); + + // We will just use the initReg since it is an available register + // and we are probably done using it anyway... + regNumber regTmp = initReg; + *pInitRegZeroed = false; + + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, REG_R21, false); + GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); +} + +//----------------------------------------------------------------------------- +// genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case +// `genUseBlockInit` is set. +// +// Arguments: +// untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init +// code will end initializing memory (not inclusive). +// untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will +// start zero initializing memory. +// initReg - A scratch register (that gets set to zero on some platforms). +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero, +// 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched. +// +void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) +{ + regNumber rAddr; + regNumber rCnt = REG_NA; // Invalid + regMaskTP regMask; + + regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers + // see: src/jit/registerloongarch64.h + availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are + // currently live + availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for + // a large constant. + + rAddr = initReg; + *pInitRegZeroed = false; + + // rAddr is not a live incoming argument reg + assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); + assert(untrLclLo % 4 == 0); + + if (emitter::isValidSimm12(untrLclLo)) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); + } + else + { + // Load immediate into the InitReg register + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg); + *pInitRegZeroed = false; + } + + bool useLoop = false; + unsigned uCntBytes = untrLclHi - untrLclLo; + assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. + unsigned int padding = untrLclLo & 0x7; + + if (padding) + { + assert(padding == 4); + GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, 0); + uCntBytes -= 4; + } + + unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use. + + // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline. + // When it is 10 or greater, we will emit a loop containing a sd instruction. + // In both of these cases the sd instruction will write two zeros to memory + // and we will use a single str instruction at the end whenever we have an odd count. + if (uCntSlots >= 10) + useLoop = true; + + if (useLoop) + { + // We pick the next lowest register number for rCnt + noway_assert(availMask != RBM_NONE); + regMask = genFindLowestBit(availMask); + rCnt = genRegNumFromMask(regMask); + availMask &= ~regMask; + + noway_assert(uCntSlots >= 2); + assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming + // argument reg + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); + + // TODO-LOONGARCH64: maybe optimize further + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rCnt, rCnt, -1); + + // bne rCnt, zero, -4 * 4 + ssize_t imm = -16; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm); + + uCntBytes %= REGSIZE_BYTES * 2; + } + else + { + while (uCntBytes >= REGSIZE_BYTES * 2) + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); + uCntBytes -= REGSIZE_BYTES * 2; + padding = 0; + } + } + + if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) + { + if ((uCntBytes - REGSIZE_BYTES) == 0) + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); + } + uCntBytes -= REGSIZE_BYTES; + } + if (uCntBytes > 0) + { + assert(uCntBytes == sizeof(int)); + GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, padding); + uCntBytes -= sizeof(int); + } + noway_assert(uCntBytes == 0); +} + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX End Prolog / Epilog XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +BasicBlock* CodeGen::genCallFinally(BasicBlock* block) +{ + // Generate a call to the finally, like this: + // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used + // bl finally-funclet + // b finally-return // Only for non-retless finally calls + // The 'b' can be a NOP if we're going to the next block. + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); + } + GetEmitter()->emitIns_J(INS_bl, block->bbJumpDest); + + if (block->bbFlags & BBF_RETLESS_CALL) + { + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. + + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + instGen(INS_break); // This should never get executed + } + } + else + { + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + GetEmitter()->emitDisableGC(); + + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) + { + // Fall-through. + // TODO-LOONGARCH64-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); + } + else + { + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); + } + + GetEmitter()->emitEnableGC(); + } + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + block = block->bbNext; + } + return block; +} + +void CodeGen::genEHCatchRet(BasicBlock* block) +{ + GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); +} + +// move an immediate value into an integer register +void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, + regNumber reg, + ssize_t imm, + insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + emitter* emit = GetEmitter(); + + if (!compiler->opts.compReloc) + { + size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs. + } + + if (EA_IS_RELOC(size)) + { + assert(genIsValidIntReg(reg)); + emit->emitIns_R_AI(INS_bl, size, reg, imm); // for example: EA_PTR_DSP_RELOC + } + else + { + emit->emitIns_I_la(size, reg, imm); + } + + regSet.verifyRegUsed(reg); +} + +/*********************************************************************************** + * + * Generate code to set a register 'targetReg' of type 'targetType' to the constant + * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call + * genProduceReg() on the target register. + */ +void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) +{ + switch (tree->gtOper) + { + case GT_CNS_INT: + { + // relocatable values tend to come down as a CNS_INT of native int type + // so the line between these two opcodes is kind of blurry + GenTreeIntConCommon* con = tree->AsIntConCommon(); + ssize_t cnsVal = con->IconValue(); + + // if (con->ImmedValNeedsReloc(compiler)) + if (con->ImmedValNeedsReloc(compiler) && compiler->opts.compReloc) + { + // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); + assert(compiler->opts.compReloc); + GetEmitter()->emitIns_R_AI(INS_bl, EA_HANDLE_CNS_RELOC, targetReg, cnsVal); + regSet.verifyRegUsed(targetReg); + } + else + { + genSetRegToIcon(targetReg, cnsVal, targetType); + } + } + break; + + case GT_CNS_DBL: + { + emitter* emit = GetEmitter(); + emitAttr size = emitActualTypeSize(tree); + double constValue = tree->AsDblCon()->gtDconVal; + + // Make sure we use "daddiu reg, zero, 0x00" only for positive zero (0.0) + // and not for negative zero (-0.0) + if (*(__int64*)&constValue == 0) + { + // A faster/smaller way to generate 0.0 + // We will just zero out the entire vector register for both float and double + emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, REG_R0); + } + else + { + // Get a temp integer register to compute long address. + // regNumber addrReg = tree->GetSingleTempReg(); + + // We must load the FP constant from the constant pool + // Emit a data section constant for the float or double constant. + CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size); + + // Load the FP constant. + assert(targetReg >= REG_F0); + + instruction ins = size == EA_4BYTE ? INS_fld_s : INS_fld_d; + + // Compute the address of the FP constant and load the data. + emit->emitIns_R_C(ins, size, targetReg, REG_NA, hnd, 0); + } + } + break; + + default: + unreached(); + } +} + +// Produce code for a GT_INC_SATURATE node. +void CodeGen::genCodeForIncSaturate(GenTree* tree) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +// Generate code to get the high N bits of a N*N=2N bit multiplication result +void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) +{ + assert(!treeNode->gtOverflowEx()); + + genConsumeOperands(treeNode); + + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(treeNode); + unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + + assert(!varTypeIsFloating(targetType)); + + // op1 and op2 can only be a reg at present, will amend in the future. + assert(!op1->isContained()); + assert(!op2->isContained()); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + + if (EA_SIZE(attr) == EA_8BYTE) + { + instruction ins = isUnsigned ? INS_mulh_du : INS_mulh_d; + + emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum()); + } + else + { + assert(EA_SIZE(attr) == EA_4BYTE); + instruction ins = isUnsigned ? INS_mulh_wu : INS_mulh_w; + + emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum()); + } + + genProduceReg(treeNode); +} + +// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR and XOR +// This method is expected to have called genConsumeOperands() before calling it. +void CodeGen::genCodeForBinary(GenTreeOp* treeNode) +{ + const genTreeOps oper = treeNode->OperGet(); + regNumber targetReg = treeNode->GetRegNum(); + emitter* emit = GetEmitter(); + + assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + instruction ins = genGetInsForOper(treeNode); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + + regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); + assert(r == targetReg); + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + bool isRegCandidate = varDsc->lvIsRegCandidate(); + + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED)) + { + var_types targetType = varDsc->GetRegisterType(tree); + // if (tree->gtFlags & GTF_UNSIGNED) + // targetType = varTypeSignedToUnsigned(targetType);//uuuuu. + emitter* emit = GetEmitter(); + + // targetType must be a normal scalar type and not a TYP_STRUCT + assert(targetType != TYP_STRUCT); + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); + + emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + noway_assert(targetType != TYP_STRUCT); + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + // record the offset + unsigned offset = tree->GetLclOffs(); + + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(targetReg == REG_NA); + + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_R0; + } + else if (data->isContained()) + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } + else + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + assert(dataReg != REG_NA); + + instruction ins = ins_StoreFromSrc(dataReg, targetType); + + emitAttr attr = emitTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); + + genUpdateLife(tree); + + varDsc->SetRegNum(REG_STK); +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// lclNode - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) +{ + GenTree* data = lclNode->gtOp1; + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (data->gtSkipReloadOrCopy()->IsMultiRegNode()) + { + genMultiRegCallStoreToLocal(lclNode); + return; + } + + regNumber targetReg = lclNode->GetRegNum(); + emitter* emit = GetEmitter(); + + unsigned varNum = lclNode->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + var_types targetType = varDsc->GetRegisterType(lclNode); + + if (lclNode->IsMultiReg()) + { + regNumber operandReg = genConsumeReg(data); + unsigned int regCount = varDsc->lvFieldCnt; + for (unsigned i = 0; i < regCount; ++i) + { + NYI("unimplemented on LOONGARCH64 yet"); + regNumber varReg = lclNode->GetRegByIndex(i); + assert(varReg != REG_NA); + unsigned fieldLclNum = varDsc->lvFieldLclStart + i; + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum); + assert(fieldVarDsc->TypeGet() == TYP_FLOAT); + GetEmitter()->emitIns_R_R_I(INS_st_d, emitTypeSize(TYP_FLOAT), varReg, operandReg, i); + } + genProduceReg(lclNode); + } + else + { +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (lclNode->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(lclNode); + return; + } +#endif // FEATURE_SIMD + + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContained()) + { + // This is only possible for a zero-init or bitcast. + const bool zeroInit = data->IsIntegralConst(0); + // TODO-LOONGARCH64-CQ: not supporting SIMD. + assert(!varTypeIsSIMD(targetType)); + + if (zeroInit) + { + dataReg = REG_R0; + } + else if (data->IsIntegralConst()) + { + ssize_t imm = data->AsIntConCommon()->IconValue(); + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + dataReg = REG_R21; + } + else + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } + } + else + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + inst_set_SV_var(lclNode); + + instruction ins = ins_StoreFromSrc(dataReg, targetType); + emitAttr attr = emitActualTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + + genUpdateLife(lclNode); + + varDsc->SetRegNum(REG_STK); + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) + { + // Assign into targetReg when dataReg (from op1) is not the same register + inst_Mov(targetType, targetReg, dataReg, true, emitActualTypeSize(targetType)); + } + genProduceReg(lclNode); + } + } +} + +//------------------------------------------------------------------------ +// genSimpleReturn: Generates code for simple return statement for loongarch64. +// +// Note: treeNode's and op1's registers are already consumed. +// +// Arguments: +// treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type +// +// Return Value: +// None +// +void CodeGen::genSimpleReturn(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + GenTree* op1 = treeNode->gtGetOp1(); + var_types targetType = treeNode->TypeGet(); + + assert(targetType != TYP_STRUCT); + assert(targetType != TYP_VOID); + + regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET; + + bool movRequired = (op1->GetRegNum() != retReg); + + if (!movRequired) + { + if (op1->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + bool isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate(); + if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0)) + { + // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR + + unsigned lclNum = lcl->GetLclNum(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + var_types op1Type = genActualType(op1->TypeGet()); + var_types lclType = genActualType(varDsc->TypeGet()); + + if (genTypeSize(op1Type) < genTypeSize(lclType)) + { + movRequired = true; + } + } + } + } + if (movRequired) + { + emitAttr attr = emitActualTypeSize(targetType); + if (varTypeUsesFloatArgReg(treeNode)) + { + if (attr == EA_4BYTE) + { + GetEmitter()->emitIns_R_R(INS_fmov_s, attr, retReg, op1->GetRegNum()); + } + else + { + GetEmitter()->emitIns_R_R(INS_fmov_d, attr, retReg, op1->GetRegNum()); + } + } + else + { + if (attr == EA_4BYTE) + { + if (treeNode->gtFlags & GTF_UNSIGNED) + { + GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_slli_w, attr, retReg, op1->GetRegNum(), 0); + } + } + else + GetEmitter()->emitIns_R_R_I(INS_ori, attr, retReg, op1->GetRegNum(), 0); + } + } +} + +/*********************************************************************************************** + * Generate code for localloc + */ +void CodeGen::genLclHeap(GenTree* tree) +{ + assert(tree->OperGet() == GT_LCLHEAP); + assert(compiler->compLocallocUsed); + + emitter* emit = GetEmitter(); + GenTree* size = tree->AsOp()->gtOp1; + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + regNumber targetReg = tree->GetRegNum(); + regNumber regCnt = REG_NA; + regNumber pspSymReg = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr; // can optimize for loongarch. + unsigned stackAdjustment = 0; + const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1; + target_ssize_t lastTouchDelta = + ILLEGAL_LAST_TOUCH_DELTA; // The number of bytes from SP to the last stack address probed. + + noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + // compute the amount of memory to allocate to properly STACK_ALIGN. + size_t amount = 0; + if (size->IsCnsIntOrI()) + { + // If size is a constant, then it must be contained. + assert(size->isContained()); + + // If amount is zero then return null in targetReg + amount = size->AsIntCon()->gtIconVal; + if (amount == 0) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); + goto BAILOUT; + } + + // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN + amount = AlignUp(amount, STACK_ALIGN); + } + else + { + // If 0 bail out by returning null in targetReg + genConsumeRegAndCopy(size, targetReg); + endLabel = genCreateTempLabel(); + emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); + + // Compute the size of the block to allocate and perform alignment. + // If compInitMem=true, we can reuse targetReg as regcnt, + // since we don't need any internal registers. + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + if (regCnt != targetReg) + { + emit->emitIns_R_R_I(INS_ori, easz, regCnt, targetReg, 0); + } + } + + // Align to STACK_ALIGN + // regCnt will be the total number of bytes to localloc + inst_RV_IV(INS_addi_d, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); + + assert(regCnt != REG_R21); + ssize_t imm2 = ~(STACK_ALIGN - 1); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, imm2); + emit->emitIns_R_R_R(INS_and, emitActualTypeSize(type), regCnt, regCnt, REG_R21); + } + + // If we have an outgoing arg area then we must adjust the SP by popping off the + // outgoing arg area. We will restore it right before we return from this method. + // + // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following + // are the cases that need to be handled: + // i) Method has out-going arg area. + // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). + // Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc + // space. + // ii) Method has no out-going arg area. + // Nothing to pop off from the stack. + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + // assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain + // // aligned + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, rsGetRsvdReg()); + stackAdjustment += outgoingArgSpaceAligned; + } + + if (size->IsCnsIntOrI()) + { + // We should reach here only for non-zero, constant size allocations. + assert(amount > 0); + ssize_t imm = -16; + + // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes. + static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); + assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time + size_t stpCount = amount / (REGSIZE_BYTES * 2); + if (compiler->info.compInitMem) + { + if (stpCount <= 4) + { + imm = -16 * stpCount; + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + + imm = -imm; + while (stpCount != 0) + { + imm -= 8; + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + imm -= 8; + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + stpCount -= 1; + } + + lastTouchDelta = 0; + + goto ALLOC_DONE; + } + } + else if (amount < compiler->eeGetPageSize()) // must be < not <= + { + // Since the size is less than a page, simply adjust the SP value. + // The SP might already be in the guard page, so we must touch it BEFORE + // the alloc, not after. + + // ld_w r0, 0(SP) + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SP, 0); + + lastTouchDelta = amount; + imm = -(ssize_t)amount; + if (emitter::isValidSimm12(imm)) + { + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, rsGetRsvdReg(), amount); + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rsGetRsvdReg()); + } + + goto ALLOC_DONE; + } + + // else, "mov regCnt, amount" + // If compInitMem=true, we can reuse targetReg as regcnt. + // Since size is a constant, regCnt is not yet initialized. + assert(regCnt == REG_NA); + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + } + genSetRegToIcon(regCnt, amount, ((unsigned int)amount == amount) ? TYP_INT : TYP_LONG); + } + + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the total number of bytes to locAlloc. + // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid + // by tickling the pages, we will just push 0's on the stack. + // + // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 + // and localloc size is a multiple of STACK_ALIGN. + + // Loop: + ssize_t imm = -16; + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 0); + + // If not done, loop + // Note that regCnt is the number of bytes to stack allocate. + // Therefore we need to subtract 16 from regcnt here. + assert(genIsValidIntReg(regCnt)); + + emit->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(type), regCnt, regCnt, -16); + + assert(imm == (-4 << 2)); // goto loop. + emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); + + lastTouchDelta = 0; + } + else + { + // At this point 'regCnt' is set to the total number of bytes to localloc. + // + // We don't need to zero out the allocated memory. However, we do have + // to tickle the pages to ensure that SP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case SP is on the last byte of the guard page. Thus you must + // touch SP-0 first not SP-0x1000. + // + // This is similar to the prolog code in CodeGen::genAllocLclFrame(). + // + // Note that we go through a few hoops so that SP never points to + // illegal pages at any time during the tickling process. + // + // sltu R21, SP, regCnt + // sub_d regCnt, SP, regCnt // regCnt now holds ultimate SP + // masknez regCnt, regCnt, R21 // Overflow, pick lowest possible value + // + // lu12i_w regTmp, eeGetPageSize()>>12 + // Loop: + // ld_w r0, 0(SP) // tickle the page - read from the page + // sub_d R21, SP, regTmp // decrement SP by eeGetPageSize() + // bltu R21, regCnt, Done + // sub_d SP, SP,regTmp + // b Loop + // + // Done: + // mov SP, regCnt + // + + // Setup the regTmp + regNumber regTmp = tree->GetSingleTempReg(); + + assert(regCnt != REG_R21); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, REG_R21, REG_SPBASE, regCnt); + + //// dsubu regCnt, SP, regCnt // regCnt now holds ultimate SP + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); + + // Overflow, set regCnt to lowest possible value + emit->emitIns_R_R_R(INS_masknez, EA_PTRSIZE, regCnt, regCnt, REG_R21); + + assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize() >> 12) << 12)); + emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize() >> 12); + + // genDefineTempLabel(loop); + + // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SPBASE, 0); + + // decrement SP by eeGetPageSize() + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_R21, REG_SPBASE, regTmp); + + assert(regTmp != REG_R21); + + ssize_t imm = 3 << 2; // goto done. + emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_R21, regCnt, imm); + + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp); + + imm = -4 << 2; + // Jump to loop and tickle new stack address + emit->emitIns_I(INS_b, EA_PTRSIZE, imm); + + // Done with stack tickle loop + // genDefineTempLabel(done); + + // Now just move the final value to SP + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); + + // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, + // we're going to assume the worst and probe. + } + +ALLOC_DONE: + // Re-adjust SP to allocate outgoing arg area. We must probe this adjustment. + if (stackAdjustment != 0) + { + assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned + assert((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || (lastTouchDelta >= 0)); + + const regNumber tmpReg = rsGetRsvdReg(); + + if ((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || + (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > + compiler->eeGetPageSize())) + { + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, tmpReg); + } + else + { + genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, tmpReg); + } + + // Return the stackalloc'ed address in result register. + // TargetReg = SP + stackAdjustment. + // + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tmpReg); + } + else // stackAdjustment == 0 + { + // Move the final value of SP to targetReg + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0); + } + +BAILOUT: + if (endLabel != nullptr) + genDefineTempLabel(endLabel); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) +{ + assert(tree->OperIs(GT_NEG, GT_NOT)); + + var_types targetType = tree->TypeGet(); + + assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); + + regNumber targetReg = tree->GetRegNum(); + instruction ins = genGetInsForOper(tree); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTree* operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + emitAttr attr = emitActualTypeSize(tree); + GetEmitter()->emitIns_R_R(ins, attr, targetReg, operandReg); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForBswap(GenTree* tree) +{ + NYI_LOONGARCH64("genCodeForBswap unimpleement yet"); +} + +//------------------------------------------------------------------------ +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. +// (1) float/double MOD is morphed into a helper call by front-end. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForDivMod(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); + + var_types targetType = tree->TypeGet(); + emitter* emit = GetEmitter(); + + genConsumeOperands(tree); + + if (varTypeIsFloating(targetType)) + { + // Floating point divide never raises an exception + assert(varTypeIsFloating(tree->gtOp1)); + assert(varTypeIsFloating(tree->gtOp2)); + assert(tree->gtOper == GT_DIV); + // genCodeForBinary(tree); + instruction ins = genGetInsForOper(tree); + emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), + tree->gtOp2->GetRegNum()); + } + else // an integer divide operation + { + GenTree* divisorOp = tree->gtGetOp2(); + // divisorOp can be immed or reg + assert(!divisorOp->isContained() || divisorOp->isContainedIntOrIImmed()); + + if (divisorOp->IsIntegralConst(0) || divisorOp->GetRegNum() == REG_R0) + { + // We unconditionally throw a divide by zero exception + genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); + } + else // the divisor is not the constant zero + { + GenTree* src1 = tree->gtOp1; + unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet())); + emitAttr size = EA_ATTR(TypeSize); + + assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) && + TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet()))); + + // ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + regNumber Reg1 = src1->GetRegNum(); + regNumber divisorReg = divisorOp->GetRegNum(); + instruction ins; + + // Check divisorOp first as we can always allow it to be a contained immediate + if (divisorOp->isContainedIntOrIImmed()) + { + ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal); + divisorReg = REG_R21; + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst); + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (tree->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!divisorOp->isContainedIntOrIImmed()); + ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal); + Reg1 = REG_R21; + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst); + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + + // Generate the require runtime checks for GT_DIV or GT_UDIV + if (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD) + { + // Two possible exceptions: + // (AnyVal / 0) => DivideByZeroException + // (MinInt / -1) => ArithmeticException + // + bool checkDividend = true; + + // Do we have an immediate for the 'divisorOp'? + // + if (divisorOp->IsCnsIntOrI()) + { + ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + // assert(intConstValue != 0); // already checked above by IsIntegralConst(0) + if (intConstValue != -1) + { + checkDividend = false; // We statically know that the dividend is not -1 + } + } + else // insert check for divison by zero + { + // Check if the divisor is zero throw a DivideByZeroException + genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg); + } + + if (checkDividend) + { + // Check if the divisor is not -1 branch to 'sdivLabel' + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1); + BasicBlock* sdivLabel = genCreateTempLabel(); // can optimize for loongarch64. + emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_R21, divisorReg); + + // If control flow continues past here the 'divisorReg' is known to be -1 + regNumber dividendReg = tree->gtGetOp1()->GetRegNum(); + // At this point the divisor is known to be -1 + // + // Wether dividendReg is MinInt or not + // + + emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0); + + emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg, + dividendReg); + genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_R21); + genDefineTempLabel(sdivLabel); + } + + // Generate the sdiv instruction + if (size == EA_4BYTE) + { + if (tree->OperGet() == GT_DIV) + { + ins = INS_div_w; + } + else + { + ins = INS_mod_w; + } + } + else + { + if (tree->OperGet() == GT_DIV) + { + ins = INS_div_d; + } + else + { + ins = INS_mod_d; + } + } + + emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); + } + else // if (tree->gtOper == GT_UDIV) GT_UMOD + { + // Only one possible exception + // (AnyVal / 0) => DivideByZeroException + // + // Note that division by the constant 0 was already checked for above by the + // op2->IsIntegralConst(0) check + // + + if (!divisorOp->IsCnsIntOrI()) + { + // divisorOp is not a constant, so it could be zero + // + genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg); + } + + if (size == EA_4BYTE) + { + if (tree->OperGet() == GT_UDIV) + { + ins = INS_div_wu; + } + else + { + ins = INS_mod_wu; + } + + // TODO-LOONGARCH64: here is just for signed-extension ? + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0); + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0); + } + else + { + if (tree->OperGet() == GT_UDIV) + { + ins = INS_div_du; + } + else + { + ins = INS_mod_du; + } + } + + emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); + } + } + } + genProduceReg(tree); +} + +// Generate code for InitBlk by performing a loop unroll +// Preconditions: +// a) Both the size and fill byte value are integer constants. +// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. +void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) +{ + assert(node->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + regNumber srcReg; + GenTree* src = node->Data(); + + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->gtGetOp1(); + } + + if (!src->isContained()) + { + srcReg = genConsumeReg(src); + } + else + { + assert(src->IsIntegralConst(0)); + srcReg = REG_R0; + } + + if (node->IsVolatile()) + { + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + unsigned size = node->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(dstOffset < INT32_MAX - static_cast(size)); + + for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) + { + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset); + emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset + 8); + } + } + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + instruction storeIns; + emitAttr attr; + + switch (regSize) + { + case 1: + storeIns = INS_st_b; + attr = EA_4BYTE; + break; + case 2: + storeIns = INS_st_h; + attr = EA_4BYTE; + break; + case 4: + storeIns = INS_st_w; + attr = EA_ATTR(regSize); + break; + case 8: + storeIns = INS_st_d; + attr = EA_ATTR(regSize); + break; + default: + unreached(); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, srcReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset); + } + } +} + +// Generate code for CpObj nodes wich copy structs that have interleaved +// GC pointers. +// For this case we'll generate a sequence of loads/stores in the case of struct +// slots that don't contain GC pointers. The generated code will look like: +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +// +// In the case of a GC-Pointer we'll call the ByRef write barrier helper +// who happens to use the same registers as the previous call to maintain +// the same register requirements and register killsets: +// bl CORINFO_HELP_ASSIGN_BYREF +// +// So finally an example would look like this: +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +// bl CORINFO_HELP_ASSIGN_BYREF +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +// bl CORINFO_HELP_ASSIGN_BYREF +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) +{ + GenTree* dstAddr = cpObjNode->Addr(); + GenTree* source = cpObjNode->Data(); + var_types srcAddrType = TYP_BYREF; + bool sourceIsLocal = false; + + assert(source->isContained()); + if (source->gtOper == GT_IND) + { + GenTree* srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + srcAddrType = srcAddr->TypeGet(); + } + else + { + noway_assert(source->IsLocal()); + sourceIsLocal = true; + } + + bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr(); + +#ifdef DEBUG + assert(!dstAddr->isContained()); + + // This GenTree node has data about GC pointers, this means we're dealing + // with CpObj. + assert(cpObjNode->GetLayout()->HasGCPtr()); +#endif // DEBUG + + // Consume the operands and get them into the right registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); + + ClassLayout* layout = cpObjNode->GetLayout(); + unsigned slots = layout->GetSlotCount(); + + // Temp register(s) used to perform the sequence of loads and stores. + regNumber tmpReg = cpObjNode->ExtractTempReg(); + regNumber tmpReg2 = REG_NA; + + assert(genIsValidIntReg(tmpReg)); + assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); + assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); + + if (slots > 1) + { + tmpReg2 = cpObjNode->GetSingleTempReg(); + assert(tmpReg2 != tmpReg); + assert(genIsValidIntReg(tmpReg2)); + assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); + assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile CpObj operation + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + + emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType); + emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet()); + + // If we can prove it's on the stack we don't need to use the write barrier. + if (dstOnStack) + { + unsigned i = 0; + // Check if two or more remaining slots and use two ld/sd sequence + while (i < slots - 1) + { + emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); + emitAttr attr1 = emitTypeSize(layout->GetGCPtrType(i + 1)); + + emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_ld_d, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, + 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_st_d, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, + 2 * TARGET_POINTER_SIZE); + i += 2; + } + + // Use a ld/sd sequence for the last remainder + if (i < slots) + { + emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); + + emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, + TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, + TARGET_POINTER_SIZE); + } + } + else + { + unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount(); + + unsigned i = 0; + while (i < slots) + { + if (!layout->IsGCPtr(i)) + { + // Check if the next slot's type is also TYP_GC_NONE and use two ld/sd + if ((i + 1 < slots) && !layout->IsGCPtr(i + 1)) + { + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, + REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, + REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); + ++i; // extra increment of i, since we are copying two items + } + else + { + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, + REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, + REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + } + } + else + { + // In the case of a GC-Pointer we'll call the ByRef write barrier helper + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + gcPtrCount--; + } + ++i; + } + assert(gcPtrCount == 0); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a INS_BARRIER_RMB after a volatile CpObj operation + // TODO-LOONGARCH64: there is only BARRIER_FULL for LOONGARCH64. + instGen_MemoryBarrier(BARRIER_FULL); + } + + // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. + // While we normally update GC info prior to the last instruction that uses them, + // these actually live into the helper call. + gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); +} + +// generate code do a switch statement based on a table of ip-relative offsets +void CodeGen::genTableBasedSwitch(GenTree* treeNode) +{ + genConsumeOperands(treeNode->AsOp()); + regNumber idxReg = treeNode->AsOp()->gtOp1->GetRegNum(); + regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum(); + + regNumber tmpReg = treeNode->GetSingleTempReg(); + + // load the ip-relative offset (which is relative to start of fgFirstBB) + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, idxReg, 2); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_8BYTE, baseReg, baseReg, REG_R21); + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0); + + // add it to the absolute address of fgFirstBB + GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg); + + // jr baseReg + GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, baseReg, 0); +} + +// emits the table and an instruction to get the address of the first element +void CodeGen::genJumpTable(GenTree* treeNode) +{ + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + assert(treeNode->OperGet() == GT_JMPTABLE); + + unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; + BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; + unsigned jmpTabOffs; + unsigned jmpTabBase; + + jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true); + + jmpTabOffs = 0; + + JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", compiler->compMethodID, jmpTabBase); + + for (unsigned i = 0; i < jumpCount; i++) + { + BasicBlock* target = *jumpTable++; + noway_assert(target->bbFlags & BBF_HAS_LABEL); + + JITDUMP(" DD L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum); + + GetEmitter()->emitDataGenData(i, target); + }; + + GetEmitter()->emitDataGenEnd(); + + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + GetEmitter()->emitIns_R_C(INS_bl, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA, + compiler->eeFindJitDataOffs(jmpTabBase), 0); + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +static inline bool isImmed(GenTree* treeNode) +{ + assert(treeNode->OperIsBinary()); + + if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) + { + return true; + } + + return false; +} + +instruction CodeGen::genGetInsForOper(GenTree* treeNode) +{ + var_types type = treeNode->TypeGet(); + genTreeOps oper = treeNode->OperGet(); + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2; + emitAttr attr = emitActualTypeSize(treeNode); + bool isImm = false; + + instruction ins = INS_break; + + if (varTypeIsFloating(type)) + { + switch (oper) + { + case GT_ADD: + if (attr == EA_4BYTE) + { + ins = INS_fadd_s; + } + else + { + ins = INS_fadd_d; + } + break; + case GT_SUB: + if (attr == EA_4BYTE) + { + ins = INS_fsub_s; + } + else + { + ins = INS_fsub_d; + } + break; + case GT_MUL: + if (attr == EA_4BYTE) + { + ins = INS_fmul_s; + } + else + { + ins = INS_fmul_d; + } + break; + case GT_DIV: + if (attr == EA_4BYTE) + { + ins = INS_fdiv_s; + } + else + { + ins = INS_fdiv_d; + } + break; + case GT_NEG: + if (attr == EA_4BYTE) + { + ins = INS_fneg_s; + } + else + { + ins = INS_fneg_d; + } + break; + + default: + NYI("Unhandled oper in genGetInsForOper() - float"); + unreached(); + break; + } + } + else + { + switch (oper) + { + case GT_ADD: + isImm = isImmed(treeNode); + if (isImm) + { + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_addi_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_addi_w; + } + } + else + { + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_add_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_add_w; + } + } + break; + + case GT_SUB: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_sub_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_sub_w; + } + break; + + case GT_MOD: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_mod_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_mod_w; + } + break; + + case GT_DIV: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_div_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_div_w; + } + break; + + case GT_UMOD: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_mod_du; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_mod_wu; + } + break; + + case GT_UDIV: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_div_du; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_div_wu; + } + break; + + case GT_MUL: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + op2 = treeNode->gtGetOp2(); + if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2)) + ins = treeNode->IsUnsigned() ? INS_mulw_d_wu : INS_mulw_d_w; + else + ins = INS_mul_d; + } + else + { + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) + ins = INS_mulw_d_wu; + else + ins = INS_mul_w; + } + break; + + case GT_NEG: + if (attr == EA_8BYTE) + { + ins = INS_dneg; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_neg; + } + break; + + case GT_NOT: + ins = INS_not; + break; + + case GT_AND: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_andi; + } + else + { + ins = INS_and; + } + break; + + case GT_AND_NOT: + assert(!isImmed(treeNode)); + ins = INS_andn; + break; + + case GT_OR: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_ori; + } + else + { + ins = INS_or; + } + break; + + case GT_LSH: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_slli_w; + } + else + { + ins = INS_slli_d; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_sll_w; + } + else + { + ins = INS_sll_d; + } + } + break; + + case GT_RSZ: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_srli_w; + } + else + { + ins = INS_srli_d; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_srl_w; + } + else + { + ins = INS_srl_d; + } + } + break; + + case GT_RSH: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_srai_w; + } + else + { + ins = INS_srai_d; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_sra_w; + } + else + { + ins = INS_sra_d; + } + } + break; + + case GT_ROR: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_rotri_w; + } + else + { + ins = INS_rotri_d; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_rotr_w; + } + else + { + ins = INS_rotr_d; + } + } + break; + + case GT_XOR: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_xori; + } + else + { + ins = INS_xor; + } + break; + + default: + NYI("Unhandled oper in genGetInsForOper() - integer"); + unreached(); + break; + } + } + return ins; +} + +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + + BasicBlock* skipLabel = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0); + + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); + emitter::EmitCallType callType; + regNumber callTarget; + + if (addr == nullptr) + { + callType = emitter::EC_INDIR_R; + callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + // TODO-LOONGARCH64: maybe optimize further. + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); + } + regSet.verifyRegUsed(callTarget); + } + else + { + callType = emitter::EC_FUNC_TOKEN; + callTarget = REG_NA; + } + + // TODO-LOONGARCH64: can optimize further !!! + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), + INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + genDefineTempLabel(skipLabel); + + regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + regSet.verifyRegistersUsed(killMask); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ +#ifdef FEATURE_SIMD + // Storing Vector3 of size 12 bytes through indirection + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(tree); + + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, + // as that is where 'addr' must go. + noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST_BYREF); + + // 'addr' goes into REG_T6 (REG_WRITE_BARRIER_DST) + genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST); + + // 'data' goes into REG_T7 (REG_WRITE_BARRIER_SRC) + genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC); + + genGCWriteBarrier(tree, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + genConsumeAddress(addr); + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + regNumber dataReg; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_R0; + } + else // data is not contained, so evaluate it into a register + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + + var_types type = tree->TypeGet(); + instruction ins = ins_Store(type); + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + // issue a full memory barrier before a volatile StInd + instGen_MemoryBarrier(); + } + + GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_SWAP node. +// +// Arguments: +// tree - the GT_SWAP node +// +void CodeGen::genCodeForSwap(GenTreeOp* tree) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//------------------------------------------------------------------------ +// genIntToFloatCast: Generate code to cast an int/long to float/double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType= int32/uint32/int64/uint64 and DstType=float/double. +// +void CodeGen::genIntToFloatCast(GenTree* treeNode) +{ + // int type --> float/double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(dstType); + + // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE + emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); + noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE)); + + bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED; + instruction ins = INS_invalid; + + genConsumeOperands(treeNode->AsOp()); + + if (IsUnsigned) + { + emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1 + + if (srcSize == EA_8BYTE) + { + ssize_t imm = 4 << 2; + emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm); + + emit->emitIns_R_R_I(INS_andi, EA_8BYTE, REG_R21, op1->GetRegNum(), 1); + emit->emitIns_R_R_I(INS_srli_d, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), 1); + emit->emitIns_R_R_R(INS_or, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), REG_R21); + } + else + { + srcSize = EA_8BYTE; + emit->emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, op1->GetRegNum(), REG_R0, 63, 32); + } + } + + ins = srcSize == EA_8BYTE ? INS_movgr2fr_d : INS_movgr2fr_w; + emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), op1->GetRegNum()); + + if (dstType == TYP_DOUBLE) + { + if (srcSize == EA_4BYTE) + { + ins = INS_ffint_d_w; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_ffint_d_l; + } + } + else + { + assert(dstType == TYP_FLOAT); + if (srcSize == EA_4BYTE) + { + ins = INS_ffint_s_w; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_ffint_s_l; + } + } + + emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), treeNode->GetRegNum()); + + if (IsUnsigned) + { + srcSize = EA_ATTR(genTypeSize(srcType)); + emit->emitIns_R_R(INS_movfr2gr_d, attr, op1->GetRegNum(), REG_SCRATCH_FLT); // recover op1 + + if (srcSize == EA_8BYTE) + { + ssize_t imm = 3 << 2; + emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm); + + emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT, + treeNode->GetRegNum()); + emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(), + REG_SCRATCH_FLT, treeNode->GetRegNum()); + } + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genFloatToIntCast: Generate code to cast float/double to int/long +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType=float/double and DstType= int32/uint32/int64/uint64 +// +void CodeGen::genFloatToIntCast(GenTree* treeNode) +{ + // we don't expect to see overflow detecting float/double --> int type conversions here + // as they should have been converted into helper calls by front-end. + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidIntReg(targetReg)); // Must be a valid int reg. + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); + + // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE + // For conversions to small types (byte/sbyte/int16/uint16) from float/double, + // we expect the front-end or lowering phase to have generated two levels of cast. + // + emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); + noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE)); + + instruction ins1 = INS_invalid; + instruction ins2 = INS_invalid; + bool IsUnsigned = varTypeIsUnsigned(dstType); + + regNumber tmpReg = REG_SCRATCH_FLT; + assert(tmpReg != op1->GetRegNum()); + + if (srcType == TYP_DOUBLE) + { + if (dstSize == EA_4BYTE) + { + ins1 = INS_ftintrz_w_d; + ins2 = INS_movfr2gr_s; + } + else + { + assert(dstSize == EA_8BYTE); + ins1 = INS_ftintrz_l_d; + ins2 = INS_movfr2gr_d; + } + } + else + { + assert(srcType == TYP_FLOAT); + if (dstSize == EA_4BYTE) + { + ins1 = INS_ftintrz_w_s; + ins2 = INS_movfr2gr_s; + } + else + { + assert(dstSize == EA_8BYTE); + ins1 = INS_ftintrz_l_s; + ins2 = INS_movfr2gr_d; + } + } + + genConsumeOperands(treeNode->AsOp()); + + if (IsUnsigned) + { + ssize_t imm = 0; + + if (srcType == TYP_DOUBLE) + { + if (dstSize == EA_4BYTE) + { + imm = 0x41e00; + } + else + { + imm = 0x43e00; + } + } + else + { + assert(srcType == TYP_FLOAT); + if (dstSize == EA_4BYTE) + { + imm = 0x4f000; + } + else + { + imm = 0x5f000; + } + } + + if (srcType == TYP_DOUBLE) + GetEmitter()->emitIns_R_R_I(INS_lu52i_d, EA_8BYTE, REG_R21, REG_R0, imm >> 8); + else + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm); + + GetEmitter()->emitIns_R_R(srcType == TYP_DOUBLE ? INS_movgr2fr_d : INS_movgr2fr_w, EA_8BYTE, tmpReg, REG_R21); + + GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(), + tmpReg, 2); + + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 0); + GetEmitter()->emitIns_I_I(INS_bcnez, EA_PTRSIZE, 2, 4 << 2); + + GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(), + tmpReg); + + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 1); + GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21, + dstSize == EA_8BYTE ? 63 : 31); + + GetEmitter()->emitIns_R_R_R_I(INS_fsel, EA_PTRSIZE, tmpReg, tmpReg, op1->GetRegNum(), 2); + + GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, tmpReg); + GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg); + + GetEmitter()->emitIns_R_R_R(INS_or, dstSize, treeNode->GetRegNum(), REG_R21, treeNode->GetRegNum()); + } + else + { + GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, op1->GetRegNum()); + GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCkfinite: Generate code for ckfinite opcode. +// +// Arguments: +// treeNode - The GT_CKFINITE node +// +// Return Value: +// None. +// +// Assumptions: +// GT_CKFINITE node has reserved an internal register. +// +void CodeGen::genCkfinite(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_CKFINITE); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + var_types targetType = treeNode->TypeGet(); + ssize_t expMask = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent. + int size = (targetType == TYP_FLOAT) ? 8 : 11; // Bit size to extract exponent. + int pos = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent. + + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(treeNode); + + // Extract exponent into a register. + regNumber intReg = treeNode->GetSingleTempReg(); + regNumber fpReg = genConsumeReg(op1); + + emit->emitIns_R_R(attr == EA_8BYTE ? INS_movfr2gr_d : INS_movfr2gr_s, attr, intReg, fpReg); + + // Mask of exponent with all 1's and check if the exponent is all 1's + instruction ins = (targetType == TYP_FLOAT) ? INS_bstrpick_w : INS_bstrpick_d; + emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos + size - 1, pos); + emit->emitIns_R_R_I(INS_xori, attr, intReg, intReg, expMask); + + genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, intReg); + + // if it is a finite value copy it to targetReg + if (treeNode->GetRegNum() != fpReg) + { + emit->emitIns_R_R(ins_Copy(targetType), attr, treeNode->GetRegNum(), fpReg); + } + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* jtree) +{ + emitter* emit = GetEmitter(); + + GenTreeOp* tree = nullptr; + regNumber targetReg; + if (jtree->OperIs(GT_JTRUE)) + { + tree = jtree->gtGetOp1()->AsOp(); + targetReg = REG_RA; + assert(tree->GetRegNum() == REG_NA); + + jtree->gtOp2 = (GenTree*)REG_RA; // targetReg + jtree->SetRegNum((regNumber)INS_bnez); + } + else + { + tree = jtree; + targetReg = tree->GetRegNum(); + } + assert(targetReg != REG_NA); + + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + + assert(genTypeSize(op1Type) == genTypeSize(op2Type)); + + if (varTypeIsFloating(op1Type)) + { + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; + + if (IsUnordered) + { + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_EQ)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); + } + } + else + { + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_EQ)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); + } + } + + emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0); + emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1 /*cc*/); + } + else + { + if (op1->isContainedIntOrIImmed()) + { + op1 = tree->gtOp2; + op2 = tree->gtOp1; + switch (tree->OperGet()) + { + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; + } + } + assert(!op1->isContainedIntOrIImmed()); + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + regNumber regOp1 = op1->GetRegNum(); + + if (op2->isContainedIntOrIImmed()) + { + ssize_t imm = op2->AsIntCon()->gtIconVal; + + switch (cmpSize) + { + case EA_4BYTE: + imm = static_cast(imm); + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + // case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue(imm)."); + } + + if (tree->OperIs(GT_LT)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + } + else if (IsUnsigned && emitter::isValidUimm11(imm)) + { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + } + else if (tree->OperIs(GT_LE)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm + 1)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); + } + else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) + { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + } + else if (tree->OperIs(GT_GT)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm + 1)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); + } + else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) + { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1); + } + } + else if (tree->OperIs(GT_GE)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + } + else if (IsUnsigned && emitter::isValidUimm11(imm)) + { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_NE)) + { + if (!imm) + { + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1); + } + else if (emitter::isValidUimm12(imm)) + { + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + } + else if (tree->OperIs(GT_EQ)) + { + if (!imm) + { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1); + } + else if (emitter::isValidUimm12(imm)) + { + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); + } + } + } + else + { + regNumber regOp2 = op2->GetRegNum(); + + if ((cmpSize == EA_4BYTE) && IsUnsigned) + { + regNumber tmpRegOp1 = REG_RA; + regNumber tmpRegOp2 = rsGetRsvdReg(); + + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, regOp1, 0); + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, regOp2, 0); + + regOp1 = tmpRegOp1; + regOp2 = tmpRegOp2; + } + + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + else if (tree->OperIs(GT_EQ)) + { + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); + } + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJumpTrue: Generate code for a GT_JTRUE node. +// +// Arguments: +// jtrue - The node +// +void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) +{ + emitter* emit = GetEmitter(); + + GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue; + regNumber targetReg = tree->GetRegNum(); + instruction ins = INS_invalid; + + if (jtrue->OperIs(GT_JTRUE) && jtrue->gtOp2) + { + emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest, + (int)(int64_t)jtrue->gtOp2); // 5-bits; + jtrue->SetRegNum(REG_NA); + jtrue->gtOp2 = nullptr; + return; + } + else + { + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + + bool IsEq = tree == jtrue->gtPrev; + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + + assert(targetReg == REG_NA); + int SaveCcResultReg = (int)REG_RA << 5; + + if (varTypeIsFloating(op1Type)) + { + assert(genTypeSize(op1Type) == genTypeSize(op2Type)); + + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; + + // here default use cc = 1 for float comparing. + if (tree->OperIs(GT_EQ)) + { + ins = INS_bcnez; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_NE)) + { + ins = INS_bceqz; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_LT)) + { + ins = INS_bcnez; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_LE)) + { + ins = INS_bcnez; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_GE)) + { + ins = INS_bceqz; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + else if (tree->OperIs(GT_GT)) + { + ins = INS_bceqz; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + } + + if (IsEq) + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1 /*cc*/); // 5-bits; + else + { + jtrue->gtOp2 = (GenTree*)(1 /*cc*/); + jtrue->SetRegNum((regNumber)ins); + } + } + else + { + if (op1->isContainedIntOrIImmed()) + { + op1 = tree->gtOp2; + op2 = tree->gtOp1; + switch (tree->OperGet()) + { + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; + } + } + + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + + regNumber regOp1 = op1->GetRegNum(); + + if (op2->isContainedIntOrIImmed()) + { + ssize_t imm = op2->AsIntCon()->gtIconVal; + + if (imm) + { + switch (cmpSize) + { + case EA_4BYTE: + if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED)) + { + imm = static_cast(imm); + } + else + { + imm = static_cast(imm); + } + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + + default: + assert(!"Unexpected type in jumpTrue(imm)."); + } + + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + } + else + { + SaveCcResultReg = 0; + } + + if (tree->OperIs(GT_LT)) + { + SaveCcResultReg |= ((int)regOp1); + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_LE)) + { + SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_GT)) + { + SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_GE)) + { + SaveCcResultReg |= ((int)regOp1); + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_NE)) + { + SaveCcResultReg |= ((int)regOp1); + ins = INS_bne; + } + else if (tree->OperIs(GT_EQ)) + { + SaveCcResultReg |= ((int)regOp1); + ins = INS_beq; + } + } + else + { + regNumber regOp2 = op2->GetRegNum(); + if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && + compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + { + regNumber tmpRegOp1 = rsGetRsvdReg(); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + regOp1 = tmpRegOp1; + regOp2 = REG_RA; + } + else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && + compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + { + regNumber tmpRegOp1 = rsGetRsvdReg(); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); + regOp1 = tmpRegOp1; + regOp2 = REG_RA; + } + else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && + compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + { + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0); + regOp2 = REG_RA; + } + else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED)) + { + if (!(op1->gtFlags & GTF_UNSIGNED)) + { + regNumber tmpRegOp1 = rsGetRsvdReg(); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + regOp1 = tmpRegOp1; + } + if (!(op2->gtFlags & GTF_UNSIGNED)) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); + regOp2 = REG_RA; + } + } + + if (tree->OperIs(GT_LT)) + { + SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5)); + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_LE)) + { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_GT)) + { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_GE)) + { + SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5)); + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_NE)) + { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = INS_bne; + } + else if (tree->OperIs(GT_EQ)) + { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = INS_beq; + } + } + + if (IsEq) + { + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg); // 5-bits; + } + else + { + jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; + jtrue->SetRegNum((regNumber)ins); + } + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJumpCompare: Generates code for jmpCompare statement. +// +// A GT_JCMP node is created when a comparison and conditional branch +// can be executed in a single instruction. +// +// LOONGARCH64 has a few instructions with this behavior. +// - beq/bne -- Compare and branch register equal/not equal +// +// The beq/bne supports the normal +/- 2^15 branch range for conditional branches +// +// A GT_JCMP beq/bne node is created when there is a GT_EQ or GT_NE +// integer/unsigned comparison against the value of Rt register which is used by +// a GT_JTRUE condition jump node. +// +// This node is repsonsible for consuming the register, and emitting the +// appropriate fused compare/test and branch instruction +// +// Two flags guide code generation +// GTF_JCMP_EQ -- Set if this is beq rather than bne +// +// Arguments: +// tree - The GT_JCMP tree node. +// +// Return Value: +// None +// +void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) +{ + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + assert(tree->OperIs(GT_JCMP)); + assert(!varTypeIsFloating(tree)); + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + assert(op2->IsCnsIntOrI()); + assert(op2->isContained()); + + genConsumeOperands(tree); + + regNumber reg = op1->GetRegNum(); + emitAttr attr = emitActualTypeSize(op1->TypeGet()); + + instruction ins; + int regs; + if (op2->AsIntCon()->gtIconVal) + { + assert(reg != REG_R21); + ssize_t imm = op2->AsIntCon()->gtIconVal; + if (attr == EA_4BYTE) + { + assert(reg != REG_RA); + imm = (int32_t)imm; + GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0); + reg = REG_RA; + } + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + regs = (int)reg << 5; + regs |= (int)REG_R21; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; + } + else + { + regs = (int)reg; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; + } + + GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits; +} + +//--------------------------------------------------------------------- +// genSPtoFPdelta - return offset from the stack pointer (Initial-SP) to the frame pointer. The frame pointer +// will point to the saved frame pointer slot (i.e., there will be frame pointer chaining). +// +int CodeGenInterface::genSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + + int delta = compiler->lvaOutgoingArgSpaceSize; + + assert(delta >= 0); + return delta; +} + +//--------------------------------------------------------------------- +// genTotalFrameSize - return the total size of the stack frame, including local size, +// callee-saved register size, etc. +// +// Return value: +// Total frame size +// + +int CodeGenInterface::genTotalFrameSize() const +{ + // For varargs functions, we home all the incoming register arguments. They are not + // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but + // since we don't use "push" instructions to save them, we don't have to do the + // save of these varargs register arguments as the first thing in the prolog. + + assert(!IsUninitialized(compiler->compCalleeRegsPushed)); + + int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; + + assert(totalFrameSize > 0); + return totalFrameSize; +} + +//--------------------------------------------------------------------- +// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. +// This number is going to be negative, since the Caller-SP is at a higher +// address than the frame pointer. +// +// There must be a frame pointer to call this function! + +int CodeGenInterface::genCallerSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + int callerSPtoFPdelta; + + callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); + + assert(callerSPtoFPdelta <= 0); + return callerSPtoFPdelta; +} + +//--------------------------------------------------------------------- +// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. +// +// This number will be negative. + +int CodeGenInterface::genCallerSPtoInitialSPdelta() const +{ + int callerSPtoSPdelta = 0; + + callerSPtoSPdelta -= genTotalFrameSize(); + + assert(callerSPtoSPdelta <= 0); + return callerSPtoSPdelta; +} + +/***************************************************************************** + * Emit a call to a helper function. + */ + +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) +{ + void* addr = nullptr; + void* pAddr = nullptr; + + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regNumber callTarget = REG_NA; + + if (addr == nullptr) + { + // This is call to a runtime helper. + // li reg, pAddr #NOTE: this maybe muti-instructions. + // ld_d reg, reg + // jirl reg + + if (callTargetReg == REG_NA) + { + // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but + // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + } + + regMaskTP callTargetMask = genRegMask(callTargetReg); + regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + + // assert that all registers in callTargetMask are in the callKillSet + noway_assert((callTargetMask & callKillSet) == callTargetMask); + + callTarget = callTargetReg; + + if (compiler->opts.compReloc) + { + // TODO-LOONGARCH64: here the bl is special flag rather than a real instruction. + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); + } + regSet.verifyRegUsed(callTarget); + + callType = emitter::EC_INDIR_R; + } + + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, + retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + regSet.verifyRegistersUsed(killMask); +} + +#ifdef FEATURE_SIMD + +//------------------------------------------------------------------------ +// genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main +// routine which in turn calls appropriate genSIMDIntrinsicXXX() routine. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// Currently, we only recognize SIMDVector and SIMDVector, and +// a limited set of methods. +// +// TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp. +void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) +{ + NYI("unimplemented on LOONGARCH64 yet"); + return INS_OPTS_NONE; +} + +// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic +// +// Arguments: +// intrinsicId - SIMD intrinsic Id +// baseType - Base type of the SIMD vector +// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode +// +// +// Return Value: +// Instruction (op) to be used, and immed is set if instruction requires an immediate operand. +// +instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) +{ + NYI("unimplemented on LOONGARCH64 yet"); + return INS_invalid; +} + +//------------------------------------------------------------------------ +// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//------------------------------------------------------------------------------------------- +// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes +// a number of arguments equal to the length of the Vector. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//---------------------------------------------------------------------------------- +// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Notes: +// The Widen intrinsics are broken into separate intrinsics for the two results. +// +void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Notes: +// This intrinsic takes two arguments. The first operand is narrowed to produce the +// lower elements of the results, and the second operand produces the high elements. +// +void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations +// add, sub, mul, bit-wise And, AndNot and Or. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater +// == and != +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//------------------------------------------------------------------------------------ +// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//------------------------------------------------------------------------------------ +// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//----------------------------------------------------------------------------- +// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to +// the given register, if any, or to memory. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// The upper half of all SIMD registers are volatile, even the callee-save registers. +// When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic +// to cause the upper half to be saved. It will first attempt to find another, unused, callee-save +// register. If such a register cannot be found, it will save it to an available caller-save register. +// In that case, this node will be marked GTF_SPILL, which will cause this method to save +// the upper half to the lclVar's home location. +// +void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//----------------------------------------------------------------------------- +// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to +// the given register, if any, or to memory. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always +// have their home register, this node has its targetReg on the lclVar child, and its source +// on the simdNode. +// Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled +// an upper-half to the lclVar's home location, this node will be marked GTF_SPILLED. +// +void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//----------------------------------------------------------------------------- +// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory. +// Since Vector3 is not a hardware supported write size, it is performed +// as two writes: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store indirect +// +// +// Return Value: +// None. +// +void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//----------------------------------------------------------------------------- +// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value. +// Since Vector3 is not a hardware supported write size, it is performed +// as two loads: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node of GT_IND +// +// +// Return Value: +// None. +// +void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//----------------------------------------------------------------------------- +// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field. +// Since Vector3 is not a hardware supported write size, it is performed +// as two stores: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store TYP_SIMD12 field +// +// Return Value: +// None. +// +void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +#endif // FEATURE_SIMD + +/***************************************************************************** + * Unit testing of the LOONGARCH64 emitter: generate a bunch of instructions into the prolog + * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late + * disassembler thinks the instructions as the same as we do. + */ + +// Uncomment "#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS" to run all the unit tests here. +// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. +//#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS + +#if defined(DEBUG) +void CodeGen::genLoongArch64EmitterUnitTests() +{ + if (!verbose) + { + return; + } + + if (!compiler->opts.altJit) + { + // No point doing this in a "real" JIT. + return; + } + + // Mark the "fake" instructions in the output. + printf("*************** In genLoongArch64EmitterUnitTests()\n"); + + printf("*************** End of genLoongArch64EmitterUnitTests()\n"); +} +#endif // defined(DEBUG) + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustment: add a specified constant value to the stack pointer. +// No probe is done. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. +// regTmp - an available temporary register that is used if 'spDelta' cannot be encoded by +// 'sub sp, sp, #spDelta' instruction. +// Can be REG_NA if the caller knows for certain that 'spDelta' fits into the immediate +// value range. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) +{ + assert(spDelta < 0); + + // We assert that the SP change is less than one page. If it's greater, you should have called a + // function that does a probe, which will in turn call this function. + assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); + + if (emitter::isValidSimm12(spDelta)) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta); + } + else + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, spDelta); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21); + } +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Should only be called as a helper for +// genStackPointerConstantAdjustmentLoopWithProbe. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, +// but the stack pointer doesn't move. +// regTmp - temporary register to use as target for probe load instruction +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) +{ + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0); + genStackPointerConstantAdjustment(spDelta, regTmp); +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Generates one probe per page, up to the total amount required. +// This will generate a sequence of probes in-line. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative. +// regTmp - temporary register to use as target for probe load instruction +// +// Return Value: +// Offset in bytes from SP to last probed address. +// +target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) +{ + assert(spDelta < 0); + + const target_size_t pageSize = compiler->eeGetPageSize(); + + ssize_t spRemainingDelta = spDelta; + do + { + ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); + spRemainingDelta -= spOneDelta; + } while (spRemainingDelta < 0); + + // What offset from the final SP was the last probe? This depends on the fact that + // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP". + target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize; + if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)) + { + // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact + // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed + // the page, but very far from the end. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we do one more probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0); + lastTouchDelta = 0; + } + + return lastTouchDelta; +} + +//------------------------------------------------------------------------ +// genCodeForTreeNode Generate code for a single node in the tree. +// +// Preconditions: +// All operands have been evaluated. +// +void CodeGen::genCodeForTreeNode(GenTree* treeNode) +{ + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + +#ifdef DEBUG + // Validate that all the operands for the current node are consumed in order. + // This is important because LSRA ensures that any necessary copies will be + // handled correctly. + lastConsumedNode = nullptr; + if (compiler->verbose) + { + unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio + compiler->gtDispLIRNode(treeNode, "Generating: "); + } +#endif // DEBUG + + // Is this a node whose value is already in a register? LSRA denotes this by + // setting the GTF_REUSE_REG_VAL flag. + if (treeNode->IsReuseRegVal()) + { + // For now, this is only used for constant nodes. + assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); + JITDUMP(" TreeNode is marked ReuseReg\n"); + return; + } + + // contained nodes are part of their parents for codegen purposes + // ex : immediates, most LEAs + if (treeNode->isContained()) + { + return; + } + + switch (treeNode->gtOper) + { + case GT_START_NONGC: + GetEmitter()->emitDisableGC(); + break; + + case GT_START_PREEMPTGC: + // Kill callee saves GC registers, and create a label + // so that information gets propagated to the emitter. + gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED); + genDefineTempLabel(genCreateTempLabel()); + break; + + case GT_PROF_HOOK: + // We should be seeing this only if profiler hook is needed + noway_assert(compiler->compIsProfilerHookNeeded()); + +#ifdef PROFILING_SUPPORTED + // Right now this node is used only for tail calls. In future if + // we intend to use it for Enter or Leave hooks, add a data member + // to this node indicating the kind of profiler hook. For example, + // helper number can be used. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif // PROFILING_SUPPORTED + break; + + case GT_LCLHEAP: + genLclHeap(treeNode); + break; + + case GT_CNS_INT: + if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT)) + { + treeNode->gtOper = GT_CNS_DBL; + } + FALLTHROUGH; + case GT_CNS_DBL: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + + case GT_NOT: + case GT_NEG: + genCodeForNegNot(treeNode); + break; + + case GT_BSWAP: + case GT_BSWAP16: + genCodeForBswap(treeNode); + break; + + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_UDIV: + genCodeForDivMod(treeNode->AsOp()); + break; + + case GT_OR: + case GT_XOR: + case GT_AND: + assert(varTypeIsIntegralOrI(treeNode)); + + FALLTHROUGH; + + case GT_ADD: + case GT_SUB: + case GT_MUL: + genConsumeOperands(treeNode->AsOp()); + genCodeForBinary(treeNode->AsOp()); + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + genCodeForShift(treeNode); + break; + + case GT_CAST: + genCodeForCast(treeNode->AsOp()); + break; + + case GT_BITCAST: + genCodeForBitCast(treeNode->AsOp()); + break; + + case GT_LCL_FLD_ADDR: + case GT_LCL_VAR_ADDR: + genCodeForLclAddr(treeNode->AsLclVarCommon()); + break; + + case GT_LCL_FLD: + genCodeForLclFld(treeNode->AsLclFld()); + break; + + case GT_LCL_VAR: + genCodeForLclVar(treeNode->AsLclVar()); + break; + + case GT_STORE_LCL_FLD: + genCodeForStoreLclFld(treeNode->AsLclFld()); + break; + + case GT_STORE_LCL_VAR: + genCodeForStoreLclVar(treeNode->AsLclVar()); + break; + + case GT_RETFILT: + case GT_RETURN: + genReturn(treeNode); + break; + + case GT_LEA: + // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. + genLeaInstruction(treeNode->AsAddrMode()); + break; + + case GT_INDEX_ADDR: + genCodeForIndexAddr(treeNode->AsIndexAddr()); + break; + + case GT_IND: + genCodeForIndir(treeNode->AsIndir()); + break; + + case GT_INC_SATURATE: + genCodeForIncSaturate(treeNode); + break; + + case GT_MULHI: + genCodeForMulHi(treeNode->AsOp()); + break; + + case GT_SWAP: + genCodeForSwap(treeNode->AsOp()); + break; + + case GT_JMP: + genJmpMethod(treeNode); + break; + + case GT_CKFINITE: + genCkfinite(treeNode); + break; + + case GT_INTRINSIC: + genIntrinsic(treeNode); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + genSIMDIntrinsic(treeNode->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + genHWIntrinsic(treeNode->AsHWIntrinsic()); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_CMP: + if (treeNode->GetRegNum() != REG_NA) + { + genCodeForCompare(treeNode->AsOp()); + } + else if (!treeNode->gtNext) + { + genCodeForJumpTrue(treeNode->AsOp()); + } + else if (!treeNode->gtNext->OperIs(GT_JTRUE)) + { + GenTree* treeNode_next = treeNode->gtNext; + while (treeNode_next) + { + if (treeNode_next->OperIs(GT_JTRUE)) + { + break; + } + treeNode_next = treeNode_next->gtNext; + }; + assert(treeNode_next->OperIs(GT_JTRUE)); + // genCodeForJumpTrue(treeNode_next->AsOp()); + genCodeForCompare(treeNode_next->AsOp()); + } + break; + + case GT_JTRUE: + genCodeForJumpTrue(treeNode->AsOp()); + break; + + case GT_JCMP: + genCodeForJumpCompare(treeNode->AsOp()); + break; + + case GT_RETURNTRAP: + genCodeForReturnTrap(treeNode->AsOp()); + break; + + case GT_STOREIND: + genCodeForStoreInd(treeNode->AsStoreInd()); + break; + + case GT_COPY: + // This is handled at the time we call genConsumeReg() on the GT_COPY + break; + + case GT_FIELD_LIST: + // Should always be marked contained. + assert(!"LIST, FIELD_LIST nodes should always be marked contained."); + break; + + case GT_PUTARG_STK: + genPutArgStk(treeNode->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + genPutArgReg(treeNode->AsOp()); + break; + +#if FEATURE_ARG_SPLIT + case GT_PUTARG_SPLIT: + genPutArgSplit(treeNode->AsPutArgSplit()); + break; +#endif // FEATURE_ARG_SPLIT + + case GT_CALL: + genCall(treeNode->AsCall()); + break; + + case GT_MEMORYBARRIER: + { + CodeGen::BarrierKind barrierKind = + treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL; + + instGen_MemoryBarrier(barrierKind); + break; + } + + case GT_XCHG: + case GT_XADD: + genLockedInstructions(treeNode->AsOp()); + break; + + case GT_CMPXCHG: + genCodeForCmpXchg(treeNode->AsCmpXchg()); + break; + + case GT_RELOAD: + // do nothing - reload is just a marker. + // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child + // into the register specified in this node. + break; + + case GT_NOP: + break; + + case GT_KEEPALIVE: + if (treeNode->AsOp()->gtOp1->isContained()) + { + // For this case we simply need to update the lifetime of the local. + genUpdateLife(treeNode->AsOp()->gtOp1); + } + else + { + genConsumeReg(treeNode->AsOp()->gtOp1); + } + break; + + case GT_NO_OP: + instGen(INS_nop); + break; + + case GT_BOUNDS_CHECK: + genRangeCheck(treeNode); + break; + + case GT_PHYSREG: + genCodeForPhysReg(treeNode->AsPhysReg()); + break; + + case GT_NULLCHECK: + genCodeForNullCheck(treeNode->AsIndir()); + break; + + case GT_CATCH_ARG: + + noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + genConsumeReg(treeNode); + break; + + case GT_PINVOKE_PROLOG: + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); + +// the runtime side requires the codegen here to be consistent +#ifdef PSEUDORANDOM_NOP_INSERTION + emit->emitDisableRandomNops(); +#endif // PSEUDORANDOM_NOP_INSERTION + break; + + case GT_LABEL: + genPendingCallLabel = genCreateTempLabel(); + emit->emitIns_R_L(INS_ld_d, EA_PTRSIZE, genPendingCallLabel, targetReg); + break; + + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + case GT_STORE_BLK: + genCodeForStoreBlk(treeNode->AsBlk()); + break; + + case GT_JMPTABLE: + genJumpTable(treeNode); + break; + + case GT_SWITCH_TABLE: + genTableBasedSwitch(treeNode); + break; + + case GT_ARR_INDEX: + genCodeForArrIndex(treeNode->AsArrIndex()); + break; + + case GT_ARR_OFFSET: + genCodeForArrOffset(treeNode->AsArrOffs()); + break; + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + + default: + { +#ifdef DEBUG + char message[256]; + _snprintf_s(message, ArrLen(message), _TRUNCATE, "NYI: Unimplemented node type %s", + GenTree::OpName(treeNode->OperGet())); + NYIRAW(message); +#else + NYI("unimplemented node"); +#endif + } + break; + } +} + +//------------------------------------------------------------------------ +// genSetRegToIcon: Generate code that will set the given register to the integer constant. +// +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type) +{ + // Reg cannot be a FP reg + assert(!genIsValidFloatReg(reg)); + + // The only TYP_REF constant that can come this path is a managed 'null' since it is not + // relocatable. Other ref type constants (e.g. string objects) go through a different + // code path. + noway_assert((type != TYP_REF) || (val == 0)); + + GetEmitter()->emitIns_I_la(emitActualTypeSize(type), reg, val); + regSet.verifyRegUsed(reg); +} + +//--------------------------------------------------------------------- +// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. +// +// Arguments: +// initReg - register to use as a scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// +// Return Value: +// None +// +void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (!compiler->getNeedsGSSecurityCookie()) + { + return; + } + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + noway_assert(compiler->gsGlobalSecurityCookieVal != 0); + // initReg = #GlobalSecurityCookieVal; [frame.GSSecurityCookie] = initReg + genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); + } + else + { + // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + { + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg, + // (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg, + (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2); + } + regSet.verifyRegUsed(initReg); + GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); + } + + *pInitRegZeroed = false; +} + +//------------------------------------------------------------------------ +// genEmitGSCookieCheck: Generate code to check that the GS cookie +// wasn't thrashed by a buffer overrun. +// +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while + // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0). + if (!pushReg && (compiler->info.compRetNativeType == TYP_REF)) + { + gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + } + + // We need two temporary registers, to load the GS cookie values and compare them. We can't use + // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be + // callee-trash registers, which should not contain anything interesting at this point. + // We don't have any IR node representing this check, so LSRA can't communicate registers + // for us to use. + + regNumber regGSConst = REG_GSCOOKIE_TMP_0; + regNumber regGSValue = REG_GSCOOKIE_TMP_1; + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + // load the GS cookie constant into a reg + // + genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + } + else + { + //// Ngen case - GS cookie constant needs to be accessed through an indirection. + // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + { + // TODO-LOONGARCH64: maybe optimize further! + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, + // (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2); + } + regSet.verifyRegUsed(regGSConst); + } + // Load this method's GS value from the stack frame + GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); + + // Compare with the GC cookie constant + BasicBlock* gsCheckBlk = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue); + + // regGSConst and regGSValue aren't needed anymore, we can use them for helper call + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst); + genDefineTempLabel(gsCheckBlk); +} + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTree* treeNode) +{ + NYI("unimplemented on LOONGARCH64 yet"); +} + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for a GT_PUTARG_STK node +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) +{ + assert(treeNode->OperIs(GT_PUTARG_STK)); + GenTree* source = treeNode->gtOp1; + var_types targetType = genActualType(source->TypeGet()); + emitter* emit = GetEmitter(); + + // This is the varNum for our store operations, + // typically this is the varNum for the Outgoing arg space + // When we are generating a tail call it will be the varNum for arg0 + unsigned varNumOut = (unsigned)-1; + unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks + + // Get argument offset to use with 'varNumOut' + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + unsigned argOffsetOut = treeNode->getArgOffset(); + +#ifdef DEBUG + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode); + assert(curArgTabEntry != nullptr); + DEBUG_ARG_SLOTS_ASSERT(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); +#endif // DEBUG + + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (treeNode->putInIncomingArgArea()) + { + varNumOut = getFirstArgWithStackSlot(); + argOffsetMax = compiler->compArgSize; +#if FEATURE_FASTTAILCALL + // This must be a fast tail call. + assert(treeNode->gtCall->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]); + assert(varDsc != nullptr); +#endif // FEATURE_FASTTAILCALL + } + else + { + varNumOut = compiler->lvaOutgoingArgSpaceVar; + argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + } + + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); + + if (!isStruct) // a normal non-Struct argument + { + if (varTypeIsSIMD(targetType)) + { + NYI("unimplemented on LOONGARCH64 yet"); + } + + instruction storeIns = ins_Store(targetType); + emitAttr storeAttr = emitTypeSize(targetType); + + // If it is contained then source must be the integer constant zero + if (source->isContained()) + { + assert(source->OperGet() == GT_CNS_INT); + assert(source->AsIntConCommon()->IconValue() == 0); + + emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut); + } + else + { + genConsumeReg(source); + if (storeIns == INS_st_w) + { + emit->emitIns_R_R_R(INS_add_w, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0); + storeIns = INS_st_d; + storeAttr = EA_8BYTE; + } + emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut); + } + argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + else // We have some kind of a struct argument + { + assert(source->isContained()); // We expect that this node was marked as contained in Lower + + if (source->OperGet() == GT_FIELD_LIST) + { + genPutArgStkFieldList(treeNode, varNumOut); + } + else // We must have a GT_OBJ or a GT_LCL_VAR + { + noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); + + var_types targetType = source->TypeGet(); + noway_assert(varTypeIsStruct(targetType)); + + // Setup loReg from the internal registers that we reserved in lower. + // + regNumber loReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTree* addrNode = nullptr; + + if (source->OperGet() == GT_LCL_VAR) + { + varNode = source->AsLclVarCommon(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + addrNode = source->AsOp()->gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + else // addrNode is used + { + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->GetRegNum(); + } + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + ClassLayout* layout; + + // unsigned gcPtrCount; // The count of GC pointers in the struct + unsigned srcSize; + + // gcPtrCount = treeNode->gtNumSlots; + // Setup the srcSize and layout + if (source->OperGet() == GT_LCL_VAR) + { + assert(varNode != nullptr); + LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); + + // This struct also must live in the stack frame + // And it can't live in a register (SIMD) + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame && !varDsc->lvRegister); + + srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine + // as that is how much stack is allocated for this LclVar + layout = varDsc->GetLayout(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + // If the source is an OBJ node then we need to use the type information + // it provides (size and GC layout) even if the node wraps a lclvar. Due + // to struct reinterpretation (e.g. Unsafe.As) it is possible that + // the OBJ node has a different type than the lclvar. + CORINFO_CLASS_HANDLE objClass = source->AsObj()->GetLayout()->GetClassHandle(); + + srcSize = compiler->info.compCompHnd->getClassSize(objClass); + layout = source->AsObj()->GetLayout(); + } + + unsigned structSize; + + unsigned dstSize = treeNode->GetStackByteSize(); + if (dstSize != srcSize) + { + // We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE. + // The dst size can be rounded up to PUTARG_STK size. + // The src size can be rounded up if it reads a local variable slot because the local + // variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE. + // The exception is arm64 apple arguments because they can be passed without padding. + if (varNode != nullptr) + { + // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size. + const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); + const unsigned varStackSize = varDsc->lvSize(); + if (varStackSize >= srcSize) + { + srcSize = varStackSize; + } + } + } + if (dstSize == srcSize) + { + structSize = dstSize; + } + else + { + // With Unsafe object wwe can have different strange combinations: + // PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes; + // PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local. + structSize = min(dstSize, srcSize); + } + + int remainingSize = structSize; + unsigned structOffset = 0; + unsigned nextIndex = 0; + + while (remainingSize > 0) + { + var_types type; + + if (remainingSize >= TARGET_POINTER_SIZE) + { + type = layout->GetGCPtrType(nextIndex); + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + // the left over size is smaller than a pointer and thus can never be a GC type + assert(!layout->IsGCPtr(nextIndex)); + + if (remainingSize == 1) + { + type = TYP_UBYTE; + } + else if (remainingSize == 2) + { + type = TYP_USHORT; + } + else + { + assert(remainingSize == 4); + type = TYP_UINT; + } + } + const emitAttr attr = emitTypeSize(type); + const unsigned moveSize = genTypeSize(type); + assert(EA_SIZE_IN_BYTES(attr) == moveSize); + + remainingSize -= moveSize; + + instruction loadIns = ins_Load(type); + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset); + } + else + { + assert(loReg != addrReg); + // Load from our address expression source + emit->emitIns_R_R_I(loadIns, attr, loReg, addrReg, structOffset); + } + + // Emit a store instruction to store the register into the outgoing argument area + instruction storeIns = ins_Store(type); + emit->emitIns_S_R(storeIns, attr, loReg, varNumOut, argOffsetOut); + argOffsetOut += moveSize; + assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area + + structOffset += moveSize; + nextIndex++; + } + } + } +} + +//--------------------------------------------------------------------- +// genPutArgReg - generate code for a GT_PUTARG_REG node +// +// Arguments +// tree - the GT_PUTARG_REG node +// +// Return value: +// None +// +void CodeGen::genPutArgReg(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_PUTARG_REG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + assert(targetType != TYP_STRUCT); + + GenTree* op1 = tree->gtOp1; + genConsumeReg(op1); + + // If child node is not already in the register we need, move it + if (targetReg != op1->GetRegNum()) + { + if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum())) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); + } + else if (emitter::isFloatReg(targetReg)) + { + GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum()); + } + else + { + assert(!emitter::isFloatReg(targetReg)); + GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum()); + } + } + genProduceReg(tree); +} + +#if FEATURE_ARG_SPLIT +//--------------------------------------------------------------------- +// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node +// +// Arguments +// tree - the GT_PUTARG_SPLIT node +// +// Return value: +// None +// +void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) +{ + assert(treeNode->OperIs(GT_PUTARG_SPLIT)); + + GenTree* source = treeNode->gtOp1; + emitter* emit = GetEmitter(); + unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; + unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + + if (source->OperGet() == GT_FIELD_LIST) + { + // Evaluate each of the GT_FIELD_LIST items into their register + // and store their register into the outgoing argument area + unsigned regIndex = 0; + unsigned firstOnStackOffs = UINT_MAX; + + for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses()) + { + GenTree* nextArgNode = use.GetNode(); + regNumber fieldReg = nextArgNode->GetRegNum(); + genConsumeReg(nextArgNode); + + if (regIndex >= treeNode->gtNumRegs) + { + if (firstOnStackOffs == UINT_MAX) + { + firstOnStackOffs = use.GetOffset(); + } + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); + + unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs; + // We can't write beyond the outgoing arg area + assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax); + + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area + emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset); + } + else + { + var_types type = treeNode->GetRegType(regIndex); + regNumber argReg = treeNode->GetRegNumByIdx(regIndex); + + // If child node is not already in the register we need, move it + if (argReg != fieldReg) + { + inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); + } + regIndex++; + } + } + } + else + { + var_types targetType = source->TypeGet(); + assert(source->OperGet() == GT_OBJ); + assert(varTypeIsStruct(targetType)); + + regNumber baseReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTree* addrNode = nullptr; + + addrNode = source->AsOp()->gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + // This is the varNum for our load operations, + // only used when we have a struct with a LclVar source + unsigned srcVarNum = BAD_VAR_NUM; + + if (varNode != nullptr) + { + assert(varNode->isContained()); + srcVarNum = varNode->GetLclNum(); + assert(srcVarNum < compiler->lvaCount); + + // handle promote situation + LclVarDsc* varDsc = compiler->lvaTable + srcVarNum; + + // This struct also must live in the stack frame + // And it can't live in a register (SIMD) + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame && !varDsc->lvRegister); + + // We don't split HFA struct + assert(!varDsc->lvIsHfa()); + } + else // addrNode is used + { + assert(addrNode != nullptr); + // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays, + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->GetRegNum(); + + // If addrReg equal to baseReg, we use the last target register as alternative baseReg. + // Because the candidate mask for the internal baseReg does not include any of the target register, + // we can ensure that baseReg, addrReg, and the last target register are not all same. + assert(baseReg != addrReg); + + // We don't split HFA struct + assert(!compiler->IsHfa(source->AsObj()->GetLayout()->GetClassHandle())); + } + + ClassLayout* layout = source->AsObj()->GetLayout(); + + // Put on stack first + unsigned nextIndex = treeNode->gtNumRegs; + unsigned structOffset = nextIndex * TARGET_POINTER_SIZE; + int remainingSize = treeNode->GetStackByteSize(); + unsigned argOffsetOut = treeNode->getArgOffset(); + + // remainingSize is always multiple of TARGET_POINTER_SIZE + assert(remainingSize % TARGET_POINTER_SIZE == 0); + while (remainingSize > 0) + { + var_types type = layout->GetGCPtrType(nextIndex); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ld_d, emitTypeSize(type), baseReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(baseReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ld_d, emitTypeSize(type), baseReg, addrReg, structOffset); + } + + // Emit str instruction to store the register into the outgoing argument area + emit->emitIns_S_R(INS_st_d, emitTypeSize(type), baseReg, varNumOut, argOffsetOut); + + argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area + remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct + structOffset += TARGET_POINTER_SIZE; + nextIndex += 1; + } + + // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use, + // in case we had to reuse the last target register for it. + structOffset = 0; + for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) + { + regNumber targetReg = treeNode->GetRegNumByIdx(idx); + var_types type = treeNode->GetRegType(idx); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1) + { + assert(targetReg != baseReg); + emit->emitIns_R_R_I(INS_ori, emitActualTypeSize(type), baseReg, addrReg, 0); + addrReg = baseReg; + } + + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset); + } + structOffset += TARGET_POINTER_SIZE; + } + } + genProduceReg(treeNode); +} +#endif // FEATURE_ARG_SPLIT + +// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local +// +// Arguments: +// treeNode - Gentree of GT_STORE_LCL_VAR +// +// Return Value: +// None +// +// Assumption: +// The child of store is a multi-reg call node. +// genProduceReg() on treeNode is made by caller of this routine. +// +void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_STORE_LCL_VAR); + + // Structs of size >=9 and <=16 are returned in two return registers on LOONGARCH64 and HFAs. + assert(varTypeIsStruct(treeNode)); + + // Assumption: current implementation requires that a multi-reg + // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from + // being promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegRet); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); + + genConsumeRegs(op1); + + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + if (treeNode->GetRegNum() != REG_NA) + { + NYI("unimplemented on LOONGARCH64 yet"); + // Right now the only enregistrable multi-reg return types supported are SIMD types. + assert(varTypeIsSIMD(treeNode)); + assert(regCount != 0); + + regNumber dst = treeNode->GetRegNum(); + + // Treat dst register as a homogenous vector with element size equal to the src size + // Insert pieces in reverse order + for (int i = regCount - 1; i >= 0; --i) + { + var_types type = pRetTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + if (varTypeIsFloating(type)) + { + // If the register piece was passed in a floating point register + // Use a vector mov element instruction + // src is not a vector, so it is in the first element reg[0] + // mov dst[i], reg[0] + // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further + // iterations + // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for + // a temporary + GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0); + } + else + { + // If the register piece was passed in an integer register + // Use a vector mov from general purpose register instruction + // mov dst[i], reg + // This effectively moves from `reg` to `dst[i]` + GetEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i); + } + } + + genProduceReg(treeNode); + } + else + { + // Stack store + int offset = 0; + var_types type = pRetTypeDesc->GetReturnRegType(0); + regNumber reg = call->GetRegNumByIdx(0); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + + if (1 < regCount) + { + offset = genTypeSize(type); + type = pRetTypeDesc->GetReturnRegType(1); + reg = call->GetRegNumByIdx(1); + offset = (offset < (int)genTypeSize(type)) ? genTypeSize(type) : offset; + GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + } + + genUpdateLife(treeNode); + varDsc->SetRegNum(REG_STK); + } +} + +//------------------------------------------------------------------------ +// genRangeCheck: generate code for GT_BOUNDS_CHECK node. +// +void CodeGen::genRangeCheck(GenTree* oper) +{ + noway_assert(oper->OperIs(GT_BOUNDS_CHECK)); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTree* arrLen = bndsChk->GetArrayLength(); + GenTree* arrIndex = bndsChk->GetIndex(); + GenTree* arrRef = NULL; + int lenOffset = 0; + + GenTree* src1; + GenTree* src2; + regNumber reg1; + regNumber reg2; + emitJumpKind jmpKind = EJ_jmp; + + genConsumeRegs(arrIndex); + genConsumeRegs(arrLen); + + emitter* emit = GetEmitter(); + GenTreeIntConCommon* intConst = nullptr; + if (arrIndex->isContainedIntOrIImmed()) + { + src1 = arrLen; + src2 = arrIndex; + reg1 = REG_R21; + reg2 = src1->GetRegNum(); + + intConst = src2->AsIntConCommon(); + ssize_t imm = intConst->IconValue(); + if (imm == INT64_MAX) + { + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1); + emit->emitIns_R_R_I(INS_srli_d, EA_PTRSIZE, REG_R21, REG_R21, 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + } + } + else + { + src1 = arrIndex; + src2 = arrLen; + reg1 = src1->GetRegNum(); + + if (src2->isContainedIntOrIImmed()) + { + reg2 = REG_R21; + ssize_t imm = src2->AsIntConCommon()->IconValue(); + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + } + else + { + reg2 = src2->GetRegNum(); + } + } + +#ifdef DEBUG + var_types bndsChkType = genActualType(src2->TypeGet()); + var_types src1ChkType = genActualType(src1->TypeGet()); + // Bounds checks can only be 32 or 64 bit sized comparisons. + assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); + assert(src1ChkType == TYP_INT || src1ChkType == TYP_LONG); +#endif // DEBUG + + genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, bndsChk->gtIndRngFailBB, reg2); +} + +//--------------------------------------------------------------------- +// genCodeForPhysReg - generate code for a GT_PHYSREG node +// +// Arguments +// tree - the GT_PHYSREG node +// +// Return value: +// None +// +void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) +{ + assert(tree->OperIs(GT_PHYSREG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + if (targetReg != tree->gtSrcReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); + genTransferRegGCState(targetReg, tree->gtSrcReg); + } + + genProduceReg(tree); +} + +//--------------------------------------------------------------------- +// genCodeForNullCheck - generate code for a GT_NULLCHECK node +// +// Arguments +// tree - the GT_NULLCHECK node +// +// Return value: +// None +// +void CodeGen::genCodeForNullCheck(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_NULLCHECK)); + assert(!tree->gtOp1->isContained()); + regNumber addrReg = genConsumeReg(tree->gtOp1); + + regNumber targetReg = REG_R0; + + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, targetReg, addrReg, 0); +} + +//------------------------------------------------------------------------ +// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, +// producing the effective index by subtracting the lower bound. +// +// Arguments: +// arrIndex - the node for which we're generating code +// +// Return Value: +// None. +// +void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) +{ + emitter* emit = GetEmitter(); + GenTree* arrObj = arrIndex->ArrObj(); + GenTree* indexNode = arrIndex->IndexExpr(); + regNumber arrReg = genConsumeReg(arrObj); + regNumber indexReg = genConsumeReg(indexNode); + regNumber tgtReg = arrIndex->GetRegNum(); + noway_assert(tgtReg != REG_NA); + + // We will use a temp register to load the lower bound and dimension size values. + + // regNumber tmpReg = arrIndex->GetSingleTempReg(); + assert(tgtReg != REG_R21); + + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + unsigned offset; + + offset = compiler->eeGetMDArrayLowerBoundOffset(rank, dim); + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); + emit->emitIns_R_R_R(INS_sub_w, EA_4BYTE, tgtReg, indexReg, REG_R21); + + offset = compiler->eeGetMDArrayLengthOffset(rank, dim); + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, tgtReg, nullptr, REG_R21); + + genProduceReg(arrIndex); +} + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + GenTree* offsetNode = arrOffset->gtOffset; + GenTree* indexNode = arrOffset->gtIndex; + regNumber tgtReg = arrOffset->GetRegNum(); + + noway_assert(tgtReg != REG_NA); + + if (!offsetNode->IsIntegralConst(0)) + { + emitter* emit = GetEmitter(); + regNumber offsetReg = genConsumeReg(offsetNode); + regNumber indexReg = genConsumeReg(indexNode); + regNumber arrReg = genConsumeReg(arrOffset->gtArrObj); + noway_assert(offsetReg != REG_NA); + noway_assert(indexReg != REG_NA); + noway_assert(arrReg != REG_NA); + + // regNumber tmpReg = arrOffset->GetSingleTempReg(); + + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + unsigned offset = compiler->eeGetMDArrayLengthOffset(rank, dim); + + // Load tmpReg with the dimension size and evaluate + // tgtReg = offsetReg*tmpReg + indexReg. + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); + emit->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, REG_R21, offsetReg); + emit->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tgtReg, REG_R21, indexReg); + } + else + { + regNumber indexReg = genConsumeReg(indexNode); + if (indexReg != tgtReg) + { + GetEmitter()->emitIns_R_R_I(INS_ori, emitActualTypeSize(TYP_INT), tgtReg, indexReg, 0); + } + } + genProduceReg(arrOffset); +} + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// +void CodeGen::genCodeForShift(GenTree* tree) +{ + instruction ins = genGetInsForOper(tree); + emitAttr size = emitActualTypeSize(tree); + + assert(tree->GetRegNum() != REG_NA); + + genConsumeOperands(tree->AsOp()); + + GenTree* operand = tree->gtGetOp1(); + GenTree* shiftBy = tree->gtGetOp2(); + if (!shiftBy->IsCnsIntOrI()) + { + GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftBy->GetRegNum()); + } + else + { + unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; + + // should check shiftByImm for loongarch32-ins. + unsigned immWidth = emitter::getBitWidth(size); // For LOONGARCH64, immWidth will be set to 32 or 64 + shiftByImm &= (immWidth - 1); + + if (ins == INS_slli_w && shiftByImm >= 32) + { + ins = INS_slli_d; + } + else if (ins == INS_slli_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_slli_d; + } + else if (ins == INS_srai_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_srai_d; + } + else if (ins == INS_srli_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_srli_d; + } + else if (ins == INS_rotri_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_rotri_d; + } + + GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. +// +// Arguments: +// tree - the node. +// +void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode) +{ + assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + + var_types targetType = lclAddrNode->TypeGet(); + emitAttr size = emitTypeSize(targetType); + regNumber targetReg = lclAddrNode->GetRegNum(); + + // Address of a local var. + noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); + + GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs()); + + genProduceReg(lclAddrNode); +} + +//------------------------------------------------------------------------ +// genCodeForLclFld: Produce code for a GT_LCL_FLD node. +// +// Arguments: +// tree - the GT_LCL_FLD node +// +void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) +{ + assert(tree->OperIs(GT_LCL_FLD)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + + NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported"); + assert(targetReg != REG_NA); + + emitAttr size = emitTypeSize(targetType); + unsigned offs = tree->GetLclOffs(); + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + + emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node. +// +// Arguments: +// tree - the GT_INDEX_ADDR node +// +void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) +{ + GenTree* const base = node->Arr(); + GenTree* const index = node->Index(); + + genConsumeReg(base); + genConsumeReg(index); + + // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers + // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the + // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(base->GetRegNum(), base->TypeGet()); + assert(!varTypeIsGC(index->TypeGet())); + + // The index is never contained, even if it is a constant. + assert(index->isUsedFromReg()); + + // Generate the bounds check if necessary. + if ((node->gtFlags & GTF_INX_RNGCHK) != 0) + { + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, base->GetRegNum(), node->gtLenOffset); + // if (index >= REG_R21) + // { + // JumpToThrowHlpBlk; + // } + // + // sltu REG_R21, index, REG_R21 + // bne REG_R21, zero, RngChkExit + // IndRngFail: + // ... + // RngChkExit: + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21); + } + + emitAttr attr = emitActualTypeSize(node); + // Can we use a shift instruction for multiply ? + // + if (isPow2(node->gtElemSize) && (node->gtElemSize < 0x10000000u)) + { + regNumber tmpReg; + if (node->gtElemSize == 0) + { + // dest = base + index + tmpReg = index->GetRegNum(); + } + else + { + DWORD scale; + BitScanForward(&scale, node->gtElemSize); + + // tmpReg = base + index << scale + // dest = base + tmpReg + GetEmitter()->emitIns_R_R_I(INS_slli_d, attr, REG_R21, index->GetRegNum(), scale); + tmpReg = REG_R21; + } + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), base->GetRegNum(), tmpReg); + } + else // we have to load the element size and use a MADD (multiply-add) instruction + { + // REG_R21 = element size + CodeGen::genSetRegToIcon(REG_R21, (ssize_t)node->gtElemSize, TYP_INT); + + // dest = index * REG_R21 + base + if (attr == EA_4BYTE) + { + GetEmitter()->emitIns_R_R_R(INS_mul_w, EA_4BYTE, REG_R21, index->GetRegNum(), REG_R21); + GetEmitter()->emitIns_R_R_R(INS_add_w, attr, node->GetRegNum(), REG_R21, base->GetRegNum()); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), REG_R21); + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), REG_R21, base->GetRegNum()); + } + } + + // dest = dest + elemOffs + GetEmitter()->emitIns_R_R_I(INS_addi_d, attr, node->GetRegNum(), node->GetRegNum(), node->gtElemOffset); + + gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); + + genProduceReg(node); +} + +//------------------------------------------------------------------------ +// genCodeForIndir: Produce code for a GT_IND node. +// +// Arguments: +// tree - the GT_IND node +// +void CodeGen::genCodeForIndir(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_IND)); + +#ifdef FEATURE_SIMD + // Handling of Vector3 type values loaded through indirection. + if (tree->TypeGet() == TYP_SIMD12) + { + genLoadIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + var_types type = tree->TypeGet(); + instruction ins = ins_Load(type); + instruction ins2 = INS_none; + regNumber targetReg = tree->GetRegNum(); + regNumber tmpReg = targetReg; + emitAttr attr = emitActualTypeSize(type); + int offset = 0; + + genConsumeAddress(tree->Addr()); + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + instGen_MemoryBarrier(BARRIER_FULL); + } + + GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree); + + genProduceReg(tree); +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call +// +// Arguments: +// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode) +{ + // Destination address goes in arg0, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile CpBlk operation + instGen_MemoryBarrier(); + } + + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a INS_BARRIER_RMB after a volatile CpBlk operation + instGen_MemoryBarrier(BARRIER_FULL); + } +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll +// +// Arguments: +// cpBlkNode - Copy block node +// +// Return Value: +// None +// +// Assumption: +// The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes. +// +void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) +{ + assert(cpBlkNode->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = cpBlkNode->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + unsigned srcLclNum = BAD_VAR_NUM; + regNumber srcAddrBaseReg = REG_NA; + int srcOffset = 0; + GenTree* src = cpBlkNode->Data(); + + assert(src->isContained()); + + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + srcLclNum = src->AsLclVarCommon()->GetLclNum(); + srcOffset = src->AsLclVarCommon()->GetLclOffs(); + } + else + { + assert(src->OperIs(GT_IND)); + GenTree* srcAddr = src->AsIndir()->Addr(); + + if (!srcAddr->isContained()) + { + srcAddrBaseReg = genConsumeReg(srcAddr); + } + else if (srcAddr->OperIsAddrMode()) + { + srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base()); + srcOffset = srcAddr->AsAddrMode()->Offset(); + } + else + { + assert(srcAddr->OperIsLocalAddr()); + srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); + srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); + } + } + + if (cpBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile CpBlk operation + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + unsigned size = cpBlkNode->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(srcOffset < INT32_MAX - static_cast(size)); + assert(dstOffset < INT32_MAX - static_cast(size)); + + regNumber tempReg = cpBlkNode->ExtractTempReg(RBM_ALLINT); + + if (size >= 2 * REGSIZE_BYTES) + { + regNumber tempReg2 = REG_R21; + + for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; + size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg, srcLclNum, srcOffset); + emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg2, srcLclNum, srcOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg, srcAddrBaseReg, srcOffset); + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg2, srcAddrBaseReg, srcOffset + 8); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg, dstLclNum, dstOffset); + emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg2, dstLclNum, dstOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg, dstAddrBaseReg, dstOffset); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg2, dstAddrBaseReg, dstOffset + 8); + } + } + } + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + instruction loadIns; + instruction storeIns; + emitAttr attr; + + switch (regSize) + { + case 1: + loadIns = INS_ld_b; + storeIns = INS_st_b; + attr = EA_4BYTE; + break; + case 2: + loadIns = INS_ld_h; + storeIns = INS_st_h; + attr = EA_4BYTE; + break; + case 4: + loadIns = INS_ld_w; + storeIns = INS_st_w; + attr = EA_ATTR(regSize); + break; + case 8: + loadIns = INS_ld_d; + storeIns = INS_st_d; + attr = EA_ATTR(regSize); + break; + default: + unreached(); + } + + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(loadIns, attr, tempReg, srcLclNum, srcOffset); + } + else + { + emit->emitIns_R_R_I(loadIns, attr, tempReg, srcAddrBaseReg, srcOffset); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, tempReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, tempReg, dstAddrBaseReg, dstOffset); + } + } + + if (cpBlkNode->IsVolatile()) + { + // issue a load barrier after a volatile CpBlk operation + instGen_MemoryBarrier(BARRIER_LOAD_ONLY); + } +} + +//------------------------------------------------------------------------ +// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call +// +// Arguments: +// initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) +{ + // Size goes in arg2, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +} + +// Generate code for a load from some address + offset +// base: tree node which can be either a local address or arbitrary node +// offset: distance from the base from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) +{ + emitter* emit = GetEmitter(); + + if (base->OperIsLocalAddr()) + { + if (base->gtOper == GT_LCL_FLD_ADDR) + { + offset += base->AsLclFld()->GetLclOffs(); + } + emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset); + } + else + { + emit->emitIns_R_R_I(ins, size, dst, base->GetRegNum(), offset); + } +} + +//------------------------------------------------------------------------ +// genCall: Produce code for a GT_CALL node +// +void CodeGen::genCall(GenTreeCall* call) +{ + // Consume all the arg regs + for (GenTreeCall::Use& use : call->LateArgs()) + { + GenTree* argNode = use.GetNode(); + + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + assert(curArgTabEntry); + + // GT_RELOAD/GT_COPY use the child node + argNode = argNode->gtSkipReloadOrCopy(); + + if (curArgTabEntry->GetRegNum() == REG_STK) + { + continue; + } + + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + regNumber argReg = curArgTabEntry->GetRegNum(); + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { + GenTree* putArgRegNode = use.GetNode(); + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); + var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL; + inst_Mov(dstType, argReg, putArgRegNode->GetRegNum(), /* canSkip */ true); + + argReg = genRegArgNext(argReg); + } + } + else if (curArgTabEntry->IsSplit()) + { + NYI("unimplemented on LOONGARCH64 yet"); + } + else + { + regNumber argReg = curArgTabEntry->GetRegNum(); + genConsumeReg(argNode); + var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL; + inst_Mov(dstType, argReg, argNode->GetRegNum(), /* canSkip */ true); + } + } + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, regThis, 0); + } + + // If fast tail call, then we are done here, we just have to load the call + // target into the right registers. We ensure in RA that target is loaded + // into a volatile register that won't be restored by epilog sequence. + if (call->IsFastTailCall()) + { + GenTree* target = getCallTarget(call, nullptr); + + if (target != nullptr) + { + // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. + genConsumeReg(target); + } +#ifdef FEATURE_READYTORUN + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) || + ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); + assert(call->gtControlExpr == nullptr); + + regNumber tmpReg = call->GetSingleTempReg(); + // Register where we save call address in should not be overridden by epilog. + assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == tmpReg); + + regNumber callAddrReg = + call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM; + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg); + // We will use this again when emitting the jump in genCallInstruction in the epilog + call->gtRsvdRegs |= genRegMask(tmpReg); + } +#endif + + return; + } + + // For a pinvoke to unmanaged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (compiler->killGCRefs(call)) + { + genDefineTempLabel(genCreateTempLabel()); + } + + genCallInstruction(call); + + // for pinvoke/intrinsic/tailcalls we may have needed to get the address of + // a label. In case it is indirect with CFG enabled make sure we do not get + // the address after the validation but only after the actual call that + // comes after. + if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) + { + genDefineInlineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + +#ifdef DEBUG + // We should not have GC pointers in killed registers live around the call. + // GC info for arg registers were cleared when consuming arg nodes above + // and LSRA should ensure it for other trashed registers. + regMaskTP killMask = RBM_CALLEE_TRASH; + if (call->IsHelperCall()) + { + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + + assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); + assert((gcInfo.gcRegByrefSetCur & killMask) == 0); +#endif + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true); + } + } + else + { + if (varTypeUsesFloatArgReg(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->GetRegNum() != returnReg) + { + inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false); + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } +} + +//------------------------------------------------------------------------ +// genCallInstruction - Generate instructions necessary to transfer control to the call. +// +// Arguments: +// call - the GT_CALL node +// +// Remaks: +// For tailcalls this function will generate a jump. +// +void CodeGen::genCallInstruction(GenTreeCall* call) +{ + // Determine return value size(s). + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + } + else + { + assert(call->gtType != TYP_STRUCT); + + if (call->gtType == TYP_REF) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + DebugInfo di; + // We need to propagate the debug information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (call->gtCallType != CT_HELPER) + { + sigInfo = call->callSig; + } + + if (call->IsFastTailCall()) + { + regMaskTP trashedByEpilog = RBM_CALLEE_SAVED; + + // The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no + // non-standard args that may be trash if this is a tailcall. + if (compiler->getNeedsGSSecurityCookie()) + { + trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0); + trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1); + } + + for (unsigned i = 0; i < call->fgArgInfo->ArgCount(); i++) + { + fgArgTabEntry* entry = call->fgArgInfo->GetArgEntry(i); + for (unsigned j = 0; j < entry->numRegs; j++) + { + regNumber reg = entry->GetRegNum(j); + if ((trashedByEpilog & genRegMask(reg)) != 0) + { + JITDUMP("Tail call node:\n"); + DISPTREE(call); + JITDUMP("Register used: %s\n", getRegName(reg)); + assert(!"Argument to tailcall may be trashed by epilog"); + } + } + } + } +#endif // DEBUG + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = getCallTarget(call, &methHnd); + + if (target != nullptr) + { + // A call target can not be a contained indirection + assert(!target->isContainedIndir()); + + // For fast tailcall we have already consumed the target. We ensure in + // RA that the target was allocated into a volatile register that will + // not be messed up by epilog sequence. + if (!call->IsFastTailCall()) + { + genConsumeReg(target); + } + + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + // + assert(genIsValidIntReg(target->GetRegNum())); + + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, // addr + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + target->GetRegNum(), + call->IsFastTailCall()); + // clang-format on + } + else + { + // If we have no target and this is a call with indirection cell then + // we do an optimization where we load the call address directly from + // the indirection cell instead of duplicating the tree. In BuildCall + // we ensure that get an extra register for the purpose. Note that for + // CFG the call might have changed to + // CORINFO_HELP_DISPATCH_INDIRECT_CALL in which case we still have the + // indirection cell but we should not try to optimize. + regNumber callThroughIndirReg = REG_NA; + if (!call->IsHelperCall(compiler, CORINFO_HELP_DISPATCH_INDIRECT_CALL)) + { + callThroughIndirReg = getCallIndirectionCellReg(call); + } + + if (callThroughIndirReg != REG_NA) + { + assert(call->IsR2ROrVirtualStubRelativeIndir()); + regNumber targetAddrReg = call->GetSingleTempReg(); + // For fast tailcalls we have already loaded the call target when processing the call node. + if (!call->IsFastTailCall()) + { + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), targetAddrReg, + callThroughIndirReg); + } + else + { + // Register where we save call address in should not be overridden by epilog. + assert((targetAddrReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == targetAddrReg); + } + + // We have now generated code loading the target address from the indirection cell into `targetAddrReg`. + // We just need to emit "bl targetAddrReg" in this case. + // + assert(genIsValidIntReg(targetAddrReg)); + + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, // addr + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + targetAddrReg, + call->IsFastTailCall()); + // clang-format on + } + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(call->gtCallType == CT_HELPER || call->gtCallType == CT_USER_FUNC); + + void* addr = nullptr; +#ifdef FEATURE_READYTORUN + if (call->gtEntryPoint.addr != NULL) + { + assert(call->gtEntryPoint.accessType == IAT_VALUE); + addr = call->gtEntryPoint.addr; + } + else +#endif // FEATURE_READYTORUN + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + assert(pAddr == nullptr); + } + else + { + // Direct call to a non-virtual user function. + addr = call->gtDirectCallAddress; + } + + assert(addr != nullptr); + + // clang-format off + genEmitCall(emitter::EC_FUNC_TOKEN, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + addr, + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + REG_NA, + call->IsFastTailCall()); + // clang-format on + } + } +} + +// Produce code for a GT_JMP node. +// The arguments of the caller needs to be transferred to the callee before exiting caller. +// The actual jump to callee is generated as part of caller epilog sequence. +// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. +void CodeGen::genJmpMethod(GenTree* jmp) +{ + assert(jmp->OperGet() == GT_JMP); + assert(compiler->compJmpOpUsed); + + // If no arguments, nothing to do + if (compiler->info.compArgsCount == 0) + { + return; + } + + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + unsigned varNum; + LclVarDsc* varDsc; + + // First move any en-registered stack arguments back to the stack. + // At the same time any reg arg not in correct reg is moved back to its stack location. + // + // We are not strictly required to spill reg args that are not in the desired reg for a jmp call + // But that would require us to deal with circularity while moving values around. Spilling + // to stack makes the implementation simple, which is not a bad trade off given Jmp calls + // are not frequent. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK)) + { + // Skip reg args which are already in its right register for jmp call. + // If not, we will spill such args to their stack locations. + // + // If we need to generate a tail call profiler hook, then spill all + // arg regs to free them up for the callback. + if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg())) + { + continue; + } + } + else if (varDsc->GetRegNum() == REG_STK) + { + // Skip args which are currently living in stack. + continue; + } + + // If we came here it means either a reg argument not in the right register or + // a stack argument currently living in a register. In either case the following + // assert should hold. + assert(varDsc->GetRegNum() != REG_STK); + assert(varDsc->TypeGet() != TYP_STRUCT); + var_types storeType = varDsc->GetStackSlotHomeType(); + emitAttr storeSize = emitActualTypeSize(storeType); + + GetEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->GetRegNum(), varNum, 0); + // Update GetRegNum() life and GC info to indicate GetRegNum() is dead and varDsc stack slot is going live. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. + // Therefore manually update life of varDsc->GetRegNum(). + regMaskTP tempMask = genRegMask(varDsc->GetRegNum()); + regSet.RemoveMaskVars(tempMask); + gcInfo.gcMarkRegSetNpt(tempMask); + if (compiler->lvaIsGCTracked(varDsc)) + { + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); + } + } + +#ifdef PROFILING_SUPPORTED + // At this point all arg regs are free. + // Emit tail call profiler callback. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif + + // Next move any un-enregistered register arguments back to their register. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + // Skip if arg not passed in a register. + if (!varDsc->lvIsRegArg) + { + continue; + } + + // Register argument + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + + // Is register argument already in the right register? + // If not load it from its stack location. + regNumber argReg = varDsc->GetArgReg(); // incoming arg register + regNumber argRegNext = REG_NA; + + if (varDsc->GetRegNum() != argReg) + { + var_types loadType = TYP_UNDEF; + + // NOTE for LOONGARCH: not supports the HFA. + assert(!varDsc->lvIsHfaRegArg()); + { + if (varTypeIsStruct(varDsc)) + { + // Must be <= 16 bytes or else it wouldn't be passed in registers, + // which can be bigger (and is handled above). + noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= 16); + if (emitter::isFloatReg(argReg)) + { + loadType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + } + else + loadType = varDsc->GetLayout()->GetGCPtrType(0); + } + else + { + loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); + } + + emitAttr loadSize = emitActualTypeSize(loadType); + GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0); + + // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting + // it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.AddMaskVars(genRegMask(argReg)); + gcInfo.gcMarkRegPtrVal(argReg, loadType); + + if (varDsc->GetOtherArgReg() < REG_STK) + { + // Restore the second register. + argRegNext = varDsc->GetOtherArgReg(); + + if (emitter::isFloatReg(argRegNext)) + { + loadType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; + } + else + loadType = varDsc->GetLayout()->GetGCPtrType(1); + + loadSize = emitActualTypeSize(loadType); + int offs = loadSize == EA_4BYTE ? 4 : 8; + GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, offs); + + regSet.AddMaskVars(genRegMask(argRegNext)); + gcInfo.gcMarkRegPtrVal(argRegNext, loadType); + } + + if (compiler->lvaIsGCTracked(varDsc)) + { + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + } + + if (compiler->info.compIsVarArgs) + { + NYI_LOONGARCH64("genJmpMethod unsupports compIsVarArgs"); + } + } +} + +//------------------------------------------------------------------------ +// genIntCastOverflowCheck: Generate overflow checking code for an integer cast. +// +// Arguments: +// cast - The GT_CAST node +// desc - The cast description +// reg - The register containing the value to check +// +void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg) +{ + switch (desc.CheckKind()) + { + case GenIntCastDesc::CHECK_POSITIVE: + { + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, REG_R0); + } + break; + + case GenIntCastDesc::CHECK_UINT_RANGE: + { + // We need to check if the value is not greater than 0xFFFFFFFF + // if the upper 32 bits are zero. + ssize_t imm = -1; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); + + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 32); + GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + break; + + case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE: + { + // We need to check if the value is not greater than 0x7FFFFFFF + // if the upper 33 bits are zero. + // instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL); + ssize_t imm = -1; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); + + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 31); + + GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + break; + + case GenIntCastDesc::CHECK_INT_RANGE: + { + const regNumber tempReg = cast->GetSingleTempReg(); + assert(tempReg != reg); + GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MAX); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, tempReg, nullptr, reg); + + GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MIN); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, tempReg); + } + break; + + default: + { + assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); + const int castMaxValue = desc.CheckSmallIntMax(); + const int castMinValue = desc.CheckSmallIntMin(); + instruction ins; + + if (castMaxValue > 2047) + { + assert((castMaxValue == 32767) || (castMaxValue == 65535)); + GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1); + ins = castMinValue == 0 ? INS_bgeu : INS_bge; + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue); + ins = castMinValue == 0 ? INS_bltu : INS_blt; + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg); + } + + if (castMinValue != 0) + { + if (emitter::isValidSimm12(castMinValue)) + { + GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue); + } + else + { + GetEmitter()->emitIns_I_la(EA_8BYTE, REG_R21, castMinValue); + GetEmitter()->emitIns_R_R_R(INS_slt, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, REG_R21); + } + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + } + break; + } +} + +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for an integer cast, with or without overflow check. +// +// Arguments: +// cast - The GT_CAST node +// +// Assumptions: +// The cast node is not a contained node and must have an assigned register. +// Neither the source nor target type can be a floating point type. +// +// TODO-LOONGARCH64-CQ: Allow castOp to be a contained node without an assigned register. +// +void CodeGen::genIntToIntCast(GenTreeCast* cast) +{ + genConsumeRegs(cast->gtGetOp1()); + + emitter* emit = GetEmitter(); + var_types dstType = cast->CastToType(); + var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); + const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + const regNumber dstReg = cast->GetRegNum(); + const unsigned char pos = 0; + const unsigned char size = 32; + + assert(genIsValidIntReg(srcReg)); + assert(genIsValidIntReg(dstReg)); + + GenIntCastDesc desc(cast); + + if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) + { + genIntCastOverflowCheck(cast, desc, srcReg); + } + + if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) + { + instruction ins; + + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + if (desc.ExtendSrcSize() == 1) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 7, pos); + } + else + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 15, pos); + } + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + ins = (desc.ExtendSrcSize() == 1) ? INS_ext_w_b : INS_ext_w_h; + emit->emitIns_R_R(ins, EA_PTRSIZE, dstReg, srcReg); + break; + + case GenIntCastDesc::ZERO_EXTEND_INT: + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 31, pos); + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); + break; + + default: + assert(desc.ExtendKind() == GenIntCastDesc::COPY); + if (srcType == TYP_INT) + { + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); + } + else + { + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0); + } + break; + } + } + + genProduceReg(cast); +} + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double. +// +void CodeGen::genFloatToFloatCast(GenTree* treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + + // treeNode must be a reg + assert(!treeNode->isContained()); + + if (srcType != dstType) + { + instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_s // convert Single to Double + : INS_fcvt_s_d; // convert Double to Single + + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + } + else if (treeNode->GetRegNum() != op1->GetRegNum()) + { + // If double to double cast or float to float cast. Emit a move instruction. + instruction ins = (srcType == TYP_FLOAT) ? INS_fmov_s : INS_fmov_d; + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCreateAndStoreGCInfo: Create and record GC Info for the function. +// +void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder != nullptr); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // We keep the call count for the second call to gcMakeRegPtrTable() below. + unsigned callCnt = 0; + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); + + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); + + if (compiler->opts.compDbgEnC) + { + // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) + // which is: + // -return address + // -saved off RBP + // -saved 'this' pointer and bool for synchronized methods + + // 4 slots for RBP + return address + RSI + RDI + int preservedAreaSize = 4 * REGSIZE_BYTES; + + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + { + preservedAreaSize += REGSIZE_BYTES; + } + + preservedAreaSize += 1; // bool for synchronized methods + } + + // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the + // frame + gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); + } + + if (compiler->opts.IsReversePInvoke()) + { + unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; + assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM); + const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); + gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset()); + } + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} + +//------------------------------------------------------------------------ +// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) +{ + assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK)); + + if (blkOp->OperIs(GT_STORE_OBJ)) + { + assert(!blkOp->gtBlkOpGcUnsafe); + assert(blkOp->OperIsCopyBlkOp()); + assert(blkOp->AsObj()->GetLayout()->HasGCPtr()); + genCodeForCpObj(blkOp->AsObj()); + return; + } + if (blkOp->gtBlkOpGcUnsafe) + { + GetEmitter()->emitDisableGC(); + } + bool isCopyBlk = blkOp->OperIsCopyBlkOp(); + + switch (blkOp->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + genCodeForCpBlkHelper(blkOp); + } + else + { + genCodeForInitBlkHelper(blkOp); + } + break; + + case GenTreeBlk::BlkOpKindUnroll: + if (isCopyBlk) + { + genCodeForCpBlkUnroll(blkOp); + } + else + { + genCodeForInitBlkUnroll(blkOp); + } + break; + + default: + unreached(); + } + + if (blkOp->gtBlkOpGcUnsafe) + { + GetEmitter()->emitEnableGC(); + } +} + +//------------------------------------------------------------------------ +// genLeaInstruction: Produce code for a GT_LEA node. +// +// Arguments: +// lea - the node +// +void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) +{ + genConsumeOperands(lea); + emitter* emit = GetEmitter(); + emitAttr size = emitTypeSize(lea); + int offset = lea->Offset(); + + // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate: + // tmpReg = indexReg << scale; + // destReg = baseReg + tmpReg; + // destReg = destReg + offset; + // + // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture + // addressing mode instruction. Currently we're 'cheating' by producing one or more + // instructions to generate the addressing mode so we need to modify lowering to + // produce LEAs that are a 1:1 relationship to the LOONGARCH64 architecture. + if (lea->Base() && lea->Index()) + { + GenTree* memBase = lea->Base(); + GenTree* index = lea->Index(); + + assert(isPow2(lea->gtScale)); + + regNumber tmpReg; + if (lea->gtScale == 0) + { + tmpReg = index->GetRegNum(); + } + else + { + DWORD scale; + BitScanForward(&scale, lea->gtScale); + assert(scale <= 4); + + emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), scale); + tmpReg = REG_R21; + } + + if (offset != 0) + { + if (emitter::isValidSimm12(offset)) + { + emit->emitIns_R_R_I(INS_addi_d, size, tmpReg, tmpReg, offset); + } + else + { + regNumber tmpReg2 = lea->GetSingleTempReg(); + + noway_assert(tmpReg2 != index->GetRegNum()); + noway_assert(tmpReg2 != memBase->GetRegNum()); + noway_assert(tmpReg2 != tmpReg); + + // compute the large offset. + emit->emitIns_I_la(EA_PTRSIZE, tmpReg2, offset); + emit->emitIns_R_R_R(INS_add_d, size, tmpReg, tmpReg, tmpReg2); + } + } + + emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); + } + else if (lea->Base()) + { + GenTree* memBase = lea->Base(); + + if (emitter::isValidSimm12(offset)) + { + if (offset != 0) + { + // Then compute target reg from [memBase + offset] + emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), memBase->GetRegNum(), offset); + } + else // offset is zero + { + if (lea->GetRegNum() != memBase->GetRegNum()) + { + emit->emitIns_R_R_I(INS_ori, size, lea->GetRegNum(), memBase->GetRegNum(), 0); + } + } + } + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = lea->GetSingleTempReg(); + + // First load tmpReg with the large offset constant + emit->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + + // Then compute target reg from [memBase + tmpReg] + emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); + } + } + else if (lea->Index()) + { + // If we encounter a GT_LEA node without a base it means it came out + // when attempting to optimize an arbitrary arithmetic expression during lower. + // This is currently disabled in LOONGARCH64 since we need to adjust lower to account + // for the simpler instructions LOONGARCH64 supports. + // TODO-LOONGARCH64-CQ: Fix this and let LEA optimize arithmetic trees too. + assert(!"We shouldn't see a baseless address computation during CodeGen for LOONGARCH64"); + } + + genProduceReg(lea); +} + +//------------------------------------------------------------------------ +// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. +// +// Arguments: +// delta - the offset to add to the current stack pointer to establish the frame pointer +// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data. + +void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) +{ + assert(compiler->compGeneratingProlog); + + if (delta == 0) + { + GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE); + } + else + { + assert(emitter::isValidSimm12(delta)); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); + } + + if (reportUnwindData) + { + compiler->unwindSetFrameReg(REG_FPBASE, delta); + } +} + +//------------------------------------------------------------------------ +// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP. +// +// Notes: +// On LOONGARCH64, this only does the probing; allocating the frame is done when callee-saved registers are saved. +// This is done before anything has been pushed. The previous frame might have a large outgoing argument +// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might +// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, +// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard +// page by default, so we need to be more careful. We do an extra probe if we might not have probed +// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this +// on Windows as well just to be consistent, even though it should not be necessary. +// +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +{ + assert(compiler->compGeneratingProlog); + + if (frameSize == 0) + { + return; + } + + const target_size_t pageSize = compiler->eeGetPageSize(); + + // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and + // if the next action on the stack might subtract from SP first, before touching the current SP, then + // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does + // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64, + // but we don't alter SP. + target_size_t lastTouchDelta = 0; + + assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); + + if (frameSize < pageSize) + { + lastTouchDelta = frameSize; + } + else if (frameSize < 3 * pageSize) + { + // We don't need a register for the target of the dummy load + // ld_w $0,offset(base) will ignor the addr-exception. + regNumber rTemp = REG_R0; + lastTouchDelta = frameSize; + + for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize) + { + // Generate: + // lw rTemp, -probeOffset(SP) // load into initReg + GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)probeOffset); + GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, initReg); + regSet.verifyRegUsed(initReg); + *pInitRegZeroed = false; // The initReg does not contain zero + + lastTouchDelta -= pageSize; + } + + assert(lastTouchDelta == frameSize % pageSize); + compiler->unwindPadding(); + } + else + { + assert(frameSize >= 3 * pageSize); + + // Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change + // until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl + // the stack afterward (which means the stack pointer needs to be known). + // + // LOONGARCH64 needs 2 registers. See VERY_LARGE_FRAME_SIZE_REG_MASK for how these + // are reserved. + + regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED); + availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live + availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg + + regNumber rOffset = initReg; + regNumber rLimit; + regMaskTP tempMask; + + // We don't need a register for the target of the dummy load + // ld_w $0,offset(base) will ignor the addr-exception. + regNumber rTemp = REG_R0; + + // We pick the next lowest register number for rLimit + noway_assert(availMask != RBM_NONE); + tempMask = genFindLowestBit(availMask); + rLimit = genRegNumFromMask(tempMask); + availMask &= ~tempMask; + + // Generate: + // + // instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize); + // instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize); + // INS_lu12i_w, REG_R21, pageSize >> 12 + // + // loop: + // ldx_w rTemp, sp, rOffset, + // sub_d rOffset, rOffset, REG_R21 + // bge rOffset, rLimit, loop // If rLimit is less or equal rOffset, we need to probe this + // rOffset. + + noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int + + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, rOffset, -(ssize_t)pageSize >> 12); + regSet.verifyRegUsed(rOffset); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, rLimit, -(ssize_t)frameSize); + regSet.verifyRegUsed(rLimit); + + assert(!(pageSize & 0xfff)); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, pageSize >> 12); + + // There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic + // `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions. + + GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, rOffset); + GetEmitter()->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, rOffset, rOffset, REG_R21); + + assert(REG_R21 != rLimit); + assert(REG_R21 != rOffset); + ssize_t imm = -2 << 2; + GetEmitter()->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, imm); + + *pInitRegZeroed = false; // The initReg does not contain zero + + compiler->unwindPadding(); + + lastTouchDelta = frameSize % pageSize; + } + + if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) + { + + assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)frameSize); + GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, REG_R0, REG_SPBASE, initReg); + compiler->unwindPadding(); + + regSet.verifyRegUsed(initReg); + *pInitRegZeroed = false; // The initReg does not contain zero + } +} + +inline void CodeGen::genJumpToThrowHlpBlk_la( + SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) +{ + assert(INS_beq <= ins && ins <= INS_bgeu); + + bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks(); + + emitter* emit = GetEmitter(); + if (useThrowHlpBlk) + { + // For code with throw helper blocks, find and use the helper block for + // raising the exception. The block may be shared by other trees too. + + BasicBlock* excpRaisingBlock; + + if (failBlk != nullptr) + { + // We already know which block to jump to. Use that. + excpRaisingBlock = failBlk; + +#ifdef DEBUG + Compiler::AddCodeDsc* add = + compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); + assert(excpRaisingBlock == add->acdDstBlk); +#if !FEATURE_FIXED_OUT_ARGS + assert(add->acdStkLvlInit || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS +#endif // DEBUG + } + else + { + // Find the helper-block which raises the exception. + Compiler::AddCodeDsc* add = + compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); + PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + excpRaisingBlock = add->acdDstBlk; +#if !FEATURE_FIXED_OUT_ARGS + assert(add->acdStkLvlInit || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS + } + + noway_assert(excpRaisingBlock != nullptr); + + // Jump to the exception-throwing block on error. + emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5)); // 5-bits; + } + else + { + // The code to throw the exception will be generated inline, and + // we will jump around it in the normal non-exception case. + + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); + emitter::EmitCallType callType; + regNumber callTarget; + + // maybe optimize + // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); + if (ins == INS_blt) + { + ins = INS_bge; + } + else if (ins == INS_bltu) + { + ins = INS_bgeu; + } + else if (ins == INS_bge) + { + ins = INS_blt; + } + else if (ins == INS_bgeu) + { + ins = INS_bltu; + } + else + { + ins = ins == INS_beq ? INS_bne : INS_beq; + } + + if (addr == nullptr) + { + callType = emitter::EC_INDIR_R; + callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + + // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl. + + // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + // emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + if (compiler->opts.compReloc) + { + ssize_t imm = (2 + 1) << 2; // , 1=jirl. + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + ssize_t imm = (3 + 1) << 2; // , 1=jirl. + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, + ((ssize_t)pAddr & 0xfff) >> 2); + } + } + else + { // INS_OPTS_C + callType = emitter::EC_FUNC_TOKEN; + callTarget = REG_NA; + + ssize_t imm = 5 << 2; + if (compiler->opts.compReloc) + { + imm = 3 << 2; + } + + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + } + + emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), + INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind))); + regSet.verifyRegistersUsed(killMask); + } +} + +//----------------------------------------------------------------------------------- +// instGen_MemoryBarrier: Emit a MemoryBarrier instruction +// +// Arguments: +// barrierKind - kind of barrier to emit (Only supports the Full now!! This depends on the CPU). +// +// Notes: +// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1 +// +void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) +{ +#ifdef DEBUG + if (JitConfig.JitNoMemoryBarriers() == 1) + { + return; + } +#endif // DEBUG + + // TODO-LOONGARCH64: Use the exact barrier type depending on the CPU. + GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL); +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + // Need to save on to the stack level, since the helper call will pop the argument + unsigned saveStackLvl2 = genStackLevel; + + /* Restore the stack level */ + SetStackLevel(saveStackLvl2); +} + +/*----------------------------------------------------------------------------- + * + * Push/Pop any callee-saved registers we have used + */ +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + +#if ETW_EBP_FRAMED + if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) + { + noway_assert(!"Used register RBM_FPBASE as a scratch register!"); + } +#endif + + // On LA we push the FP (frame-pointer) here along with all other callee saved registers + if (isFramePointerUsed()) + { + rsPushRegs |= RBM_FPBASE; + } + + // + // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require + // changes in GC suspension architecture. + // + // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we + // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf + // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends + // on the return address to be saved on the stack. If we skipped pushing/popping ra, the return address would never + // be saved on the stack and the GC suspension would time out. + // + // So if we wanted to skip pushing/popping ra for leaf frames, we would also need to do one of + // the following to make GC suspension work in the above scenario: + // - Make return address hijacking work even when ra is not saved on the stack. + // - Generate fully interruptible code for loops that contains calls + // - Generate fully interruptible code for leaf methods + // + // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity + // is not worth it. + // + + rsPushRegs |= RBM_RA; // We must save the return address (in the RA register). + regSet.rsMaskCalleeSaved = rsPushRegs; + regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; + +#ifdef DEBUG + if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) + { + printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", + compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); + dspRegMask(rsPushRegs); + printf("\n"); + assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); + } +#endif // DEBUG + + // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or + // requirements and + // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets() + // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture) + // for pictures of the funclet frame layouts. + // + // For most frames, generate, e.g.: + // sdc1 f31, off+7*8(sp) + // ... + // sdc1 f24, off(sp) + // + // sd s7, off2+7*8(sp) + // ... + // sd s1, off2+8(sp) + // sd s0, off2(sp) + // + // sd fp, 0(sp) + // sd ra, 8(sp) + // + // Notes: + // 1. FP is always saved, and the first store is FP, RA. + // 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes, but SIMD/FP registers 16 bytes. + // TODO-LOONGARCH64: supporting SIMD feature ! + // 3. For frames with varargs, not implemented completely and not tested ! + // 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). + // + // For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top + // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same + // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. + // + // The frames look like the following (simplified to only include components that matter for establishing the + // frames). See also Compiler::lvaAssignFrameOffsets(). + // + // + // Frames with FP, RA saved at bottom of frame (above outgoing argument space): + // + // | | + // |-----------------------| + // | incoming arguments | + // +=======================+ <---- Caller's SP + // | Arguments Or | // if needed. + // | Varargs regs space | // Only for varargs functions; (varargs not implemented for LoongArch64) + // |-----------------------| + // |Callee saved registers | // not including FP/RA; multiple of 8 bytes + // |-----------------------| + // | PSP slot | // 8 bytes (omitted in CoreRT ABI) + // |-----------------------| + // | locals, temps, etc. | + // |-----------------------| + // | possible GS cookie | + // |-----------------------| + // | Saved RA | // 8 bytes + // |-----------------------| + // | Saved FP | // 8 bytes + // |-----------------------| + // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + // |-----------------------| <---- Ambient SP + // | | | + // ~ | Stack grows ~ + // | | downward | + // V + // + // + // Frames with FP, RA saved at top of frame (note: above all callee-saved regs): + // + // | | + // |-----------------------| + // | incoming arguments | + // +=======================+ <---- Caller's SP + // | Arguments Or | // if needed. + // | Varargs regs space | // Only for varargs functions; (varargs not implemented for LoongArch64) + // |-----------------------| + // | Saved RA | // 8 bytes + // |-----------------------| + // | Saved FP | // 8 bytes + // |-----------------------| + // |Callee saved registers | // not including FP/RA; multiple of 8 bytes + // |-----------------------| + // | PSP slot | // 8 bytes (omitted in CoreRT ABI) + // |-----------------------| + // | locals, temps, etc. | + // |-----------------------| + // | possible GS cookie | + // |-----------------------| + // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + // |-----------------------| <---- Ambient SP + // | | | + // ~ | Stack grows ~ + // | | downward | + // V + // + + int totalFrameSize = genTotalFrameSize(); + + int offset; // This will be the starting place for saving the callee-saved registers, in increasing order. + +#ifdef DEBUG + if (verbose) + { + printf("Save float regs: "); + dspRegMask(maskSaveRegsFloat); + printf("\n"); + printf("Save int regs: "); + dspRegMask(maskSaveRegsInt); + printf("\n"); + } +#endif // DEBUG + + // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we + // generate based on various sizes. + int frameType = 0; + + // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the + // first save instruction as a "predecrement" amount, if possible. + int calleeSaveSPDelta = 0; + + // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.) + bool establishFramePointer = true; + + // If we do establish the frame pointer, what is the amount we add to SP to do so? + unsigned offsetSpToSavedFp = 0; + + if (isFramePointerUsed()) + { + // We need to save both FP and RA. + + assert((maskSaveRegsInt & RBM_FP) != 0); + assert((maskSaveRegsInt & RBM_RA) != 0); + + // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address + // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses, + // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will + // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our + // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above + // the GS cookie. + // + // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to + // create a frame pointer chain. + // + + if (totalFrameSize < 2048) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + + // Case #1. + // + // Generate: + // daddiu sp, sp, -framesz + // sd fp, outsz(sp) + // sd ra, outsz+8(sp) + // + // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld. + // + // After saving callee-saved registers, we establish the frame pointer with: + // daddiu fp, sp, offset-fp + // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. + + JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); + + frameType = 1; + + offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp); + compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8); + compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8); + + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + + offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA + } + else + { + JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); + + frameType = 2; + + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + + offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + } + } + else + { + // No frame pointer (no chaining). + assert((maskSaveRegsInt & RBM_FP) == 0); + assert((maskSaveRegsInt & RBM_RA) != 0); + + // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using + // 'sd' if we only have one callee-saved register plus RA to save. + + NYI("Frame without frame pointer"); + offset = 0; + } + + assert(frameType != 0); + + JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta); + + // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, + // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't + // need to add codes at all. + if (compiler->info.compIsVarArgs) + { + JITDUMP(" compIsVarArgs=true\n"); + NYI_LOONGARCH64("genPushCalleeSavedRegisters unsupports compIsVarArgs"); + } + +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType); + } +#endif + if (frameType == 1) + { + // offsetSpToSavedFp = genSPtoFPdelta(); + } + else if (frameType == 2) + { + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + + offsetSpToSavedFp = offset; + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + genEstablishFramePointer(offset, /* reportUnwindData */ true); + + calleeSaveSPDelta = compiler->lvaOutgoingArgSpaceSize & ~0xf; + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + } + else + { + calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + + offset = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + genEstablishFramePointer(offset, /* reportUnwindData */ true); + } + + establishFramePointer = false; + } + else + { + unreached(); + } + + if (establishFramePointer) + { + JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp); + genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); + } +} + +void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) +{ + assert(compiler->compGeneratingEpilog); + + regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + + if (isFramePointerUsed()) + { + rsRestoreRegs |= RBM_FPBASE; + } + + rsRestoreRegs |= RBM_RA; // We must save/restore the return address. + + regMaskTP regsToRestoreMask = rsRestoreRegs; + + int totalFrameSize = genTotalFrameSize(); + + int calleeSaveSPOffset = 0; // This will be the starting place for restoring + // the callee-saved registers, in decreasing order. + int frameType = 0; // An indicator of what type of frame we are popping. + int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored. + + if (isFramePointerUsed()) + { + if (totalFrameSize <= 2047) + { + if (compiler->compLocallocUsed) + { + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); + compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); + } + + JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed)); + + frameType = 1; + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; + } + else + { + JITDUMP("Frame type 2(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + + frameType = 2; + + int outSzAligned; + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - offset; + + int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); + offset2 = calleeSaveSPDelta - offset2; + + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + else + { + outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; + genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + } + else + { + int offset2 = compiler->lvaOutgoingArgSpaceSize; + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, + /* reportUnwindData */ true); + } + } + } + else + { + // No frame pointer (no chaining). + NYI("Frame without frame pointer"); + calleeSaveSPOffset = 0; + } + + JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta); + + if (frameType == 1) + { + calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8); + compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset); + compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset); + + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + } + else if (frameType == 2) + { + // had done. + } + else + { + unreached(); + } +} + +void CodeGen::genFnPrologCalleeRegArgs() +{ + assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); + + regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; + +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFnPrologCalleeRegArgs() LOONGARCH64:0x%llx.\n", regArgMaskLive); + } +#endif + + // We should be generating the prolog block when we are called + assert(compiler->compGeneratingProlog); + + // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called. + noway_assert(regArgMaskLive != 0); + + unsigned varNum; + unsigned regArgsVars[MAX_REG_ARG * 2] = {0}; + unsigned regArgNum = 0; + for (varNum = 0; varNum < compiler->lvaCount; ++varNum) + { + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + // Is this variable a register arg? + if (!varDsc->lvIsParam) + { + continue; + } + + if (!varDsc->lvIsRegArg) + { + continue; + } + + if (varDsc->lvIsInReg()) + { + assert(genIsValidIntReg(varDsc->GetArgReg()) || genIsValidFloatReg(varDsc->GetArgReg())); + assert(!(genIsValidIntReg(varDsc->GetOtherArgReg()) || genIsValidFloatReg(varDsc->GetOtherArgReg()))); + if (varDsc->GetArgInitReg() != varDsc->GetArgReg()) + { + if (varDsc->GetArgInitReg() > REG_ARG_LAST) + { + inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, + varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + regArgsVars[regArgNum] = varNum; + regArgNum++; + } + } + else + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); +#ifdef USING_SCOPE_INFO + psiMoveToReg(varNum); +#endif // USING_SCOPE_INFO + if (!varDsc->lvLiveInOutOfHndlr) + { + continue; + } + } + + // When we have a promoted struct we have two possible LclVars that can represent the incoming argument + // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. + // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise + // use the the original TYP_STRUCT argument. + // + if (varDsc->lvPromoted || varDsc->lvIsStructField) + { + assert(!"-------------Should confirm on Loongarch!"); + } + + var_types storeType = TYP_UNDEF; + unsigned slotSize = TARGET_POINTER_SIZE; + + if (varTypeIsStruct(varDsc)) + { + if (emitter::isFloatReg(varDsc->GetArgReg())) + { + storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + } + else + { + assert(emitter::isGeneralRegister(varDsc->GetArgReg())); + if (varDsc->lvIs4Field1) + { + storeType = TYP_INT; + } + else + { + storeType = varDsc->GetLayout()->GetGCPtrType(0); + } + } + slotSize = (unsigned)emitActualTypeSize(storeType); + +#if FEATURE_MULTIREG_ARGS + // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers + noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES); +#endif + } + else // Not a struct type + { + storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); + if (emitter::isFloatReg(varDsc->GetArgReg()) != varTypeIsFloating(storeType)) + { + assert(varTypeIsFloating(storeType)); + storeType = storeType == TYP_DOUBLE ? TYP_I_IMPL : TYP_INT; + } + } + emitAttr size = emitActualTypeSize(storeType); + + regNumber srcRegNum = varDsc->GetArgReg(); + + // Stack argument - if the ref count is 0 don't care about it + if (!varDsc->lvOnFrame) + { + noway_assert(varDsc->lvRefCnt() == 0); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + if (varDsc->GetOtherArgReg() < REG_STK) + { + regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg()); + } + } + else + { + assert(srcRegNum != varDsc->GetOtherArgReg()); + + int tmp_offset = 0; + regNumber tmp_reg = REG_NA; + + bool FPbased; + int baseOffset = 0; //(regArgTab[argNum].slot - 1) * slotSize; + int base = compiler->lvaFrameAddress(varNum, &FPbased); + + base += baseOffset; + + if (emitter::isValidSimm12(base)) + { + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + } + else + { + assert(tmp_reg == REG_NA); + + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + // NOTE: `REG_R21` will be used within `emitIns_S_R`. + // Details see the comment for `emitIns_S_R`. + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); + } + + regArgMaskLive &= ~genRegMask(srcRegNum); + + // Check if we are writing past the end of the struct + if (varTypeIsStruct(varDsc)) + { + if (emitter::isFloatReg(varDsc->GetOtherArgReg())) + { + baseOffset = (int)EA_SIZE(emitActualTypeSize(storeType)); + storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; + size = EA_SIZE(emitActualTypeSize(storeType)); + baseOffset = baseOffset < (int)size ? (int)size : baseOffset; + srcRegNum = varDsc->GetOtherArgReg(); + } + else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg())) + { + baseOffset = (int)EA_SIZE(slotSize); + if (varDsc->lvIs4Field2) + { + storeType = TYP_INT; + } + else + { + storeType = varDsc->GetLayout()->GetGCPtrType(1); + } + size = emitActualTypeSize(storeType); + if (baseOffset < (int)EA_SIZE(size)) + { + baseOffset = (int)EA_SIZE(size); + } + srcRegNum = varDsc->GetOtherArgReg(); + } + + if (srcRegNum == varDsc->GetOtherArgReg()) + { + base += baseOffset; + + if (emitter::isValidSimm12(base)) + { + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + } + else + { + if (tmp_reg == REG_NA) + { + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + // NOTE: `REG_R21` will be used within `emitIns_S_R`. + // Details see the comment for `emitIns_S_R`. + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); + } + else + { + baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8); + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, baseOffset); + } + } + regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better! + } + else if (varDsc->lvIsSplit) + { + assert(varDsc->GetArgReg() == REG_ARG_LAST && varDsc->GetOtherArgReg() == REG_STK); + baseOffset = 8; + base += 8; + + GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); + if (emitter::isValidSimm12(base)) + { + GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset); + } + else + { + if (tmp_reg == REG_NA) + { + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + // NOTE: `REG_R21` will be used within `emitIns_S_R`. + // Details see the comment for `emitIns_S_R`. + GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8); + } + else + { + baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8); + GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, baseOffset); + } + } + } + } + +#ifdef USING_SCOPE_INFO + { + psiMoveToStack(varNum); + } +#endif // USING_SCOPE_INFO + } + } + + while (regArgNum > 0) + { + varNum = regArgsVars[regArgNum - 1]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + if (varDsc->GetArgInitReg() > varDsc->GetArgReg()) + { + var_types destMemType = varDsc->TypeGet(); + GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), + varDsc->GetArgReg()); + regArgNum--; + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + for (unsigned i = 0; i < regArgNum; i++) + { + LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i]; + var_types destMemType = varDsc2->GetRegisterType(); + inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, + emitActualTypeSize(destMemType)); + regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg()); + } + break; + } + } + + assert(!regArgMaskLive); +} + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// set to non-zero value after this call. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } +} + +// return size +// alignmentWB is out param +unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB) +{ + unsigned alignment = 0; + unsigned opSize = 0; + + if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp()) + { + opSize = InferStructOpSizeAlign(op, &alignment); + } + else + { + alignment = genTypeAlignments[op->TypeGet()]; + opSize = genTypeSizes[op->TypeGet()]; + } + + assert(opSize != 0); + assert(alignment != 0); + + (*alignmentWB) = alignment; + return opSize; +} + +// return size +// alignmentWB is out param +unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB) +{ + unsigned alignment = 0; + unsigned opSize = 0; + + while (op->gtOper == GT_COMMA) + { + op = op->AsOp()->gtOp2; + } + + if (op->gtOper == GT_OBJ) + { + CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->GetLayout()->GetClassHandle(); + opSize = op->AsObj()->GetLayout()->GetSize(); + alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); + } + else if (op->gtOper == GT_LCL_VAR) + { + const LclVarDsc* varDsc = compiler->lvaGetDesc(op->AsLclVarCommon()); + assert(varDsc->lvType == TYP_STRUCT); + opSize = varDsc->lvSize(); + { + alignment = TARGET_POINTER_SIZE; + } + } + else if (op->gtOper == GT_MKREFANY) + { + opSize = TARGET_POINTER_SIZE * 2; + alignment = TARGET_POINTER_SIZE; + } + else if (op->IsArgPlaceHolderNode()) + { + CORINFO_CLASS_HANDLE clsHnd = op->AsArgPlace()->gtArgPlaceClsHnd; + assert(clsHnd != 0); + opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); + alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); + } + else + { + assert(!"Unhandled gtOper"); + opSize = TARGET_POINTER_SIZE; + alignment = TARGET_POINTER_SIZE; + } + + assert(opSize != 0); + assert(alignment != 0); + + (*alignmentWB) = alignment; + return opSize; +} + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5d7b518aadd4f..b30388047efe0 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -537,12 +537,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS useType = TYP_SHORT; break; -#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) +#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) case 3: useType = TYP_INT; break; -#endif // !TARGET_XARCH || UNIX_AMD64_ABI +#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 #ifdef TARGET_64BIT case 4: @@ -550,14 +550,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS useType = TYP_INT; break; -#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) +#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) case 5: case 6: case 7: useType = TYP_I_IMPL; break; -#endif // !TARGET_XARCH || UNIX_AMD64_ABI +#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 #endif // TARGET_64BIT case TARGET_POINTER_SIZE: @@ -749,10 +749,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, useType = TYP_UNKNOWN; } -#elif defined(TARGET_X86) || defined(TARGET_ARM) +#elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // Otherwise we pass this struct by value on the stack // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI + // On LOONGARCH64 struct that is 1-16 bytes is passed by value in one/two register(s) howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; @@ -776,7 +777,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Otherwise we pass this struct by reference to a copy // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy) @@ -901,6 +902,22 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToReturnStruct = SPK_ByReference; useType = TYP_UNKNOWN; } +#elif TARGET_LOONGARCH64 + if (structSize <= (TARGET_POINTER_SIZE * 2)) + { + uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); + + if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + howToReturnStruct = SPK_PrimitiveType; + useType = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT; + } + else if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) + { + howToReturnStruct = SPK_ByValue; + useType = TYP_STRUCT; + } + } #endif if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) && !isNativePrimitiveStructType(clsHnd)) @@ -1043,6 +1060,12 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToReturnStruct = SPK_ByReference; useType = TYP_UNKNOWN; +#elif defined(TARGET_LOONGARCH64) + + // On LOONGARCH64 struct that is 1-16 bytes is returned by value in one/two register(s) + howToReturnStruct = SPK_ByValue; + useType = TYP_STRUCT; + #else // TARGET_XXX noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)"); @@ -2218,6 +2241,11 @@ void Compiler::compSetProcessor() info.genCPU = CPU_X86_PENTIUM_4; else info.genCPU = CPU_X86; + +#elif defined(TARGET_LOONGARCH64) + + info.genCPU = CPU_LOONGARCH64; + #endif // @@ -3888,7 +3916,7 @@ void Compiler::compSetOptimizationLevel() fgCanRelocateEHRegions = true; } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Function compRsvdRegCheck: // given a curState to use for calculating the total frame size // it will return true if the REG_OPT_RSVD should be reserved so @@ -3933,6 +3961,10 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) JITDUMP(" Returning true (ARM64)\n\n"); return true; // just always assume we'll need it, for now +#elif defined(TARGET_LOONGARCH64) + JITDUMP(" Returning true (LOONGARCH64)\n\n"); + return true; // just always assume we'll need it, for now + #else // TARGET_ARM // frame layout: @@ -4056,7 +4088,7 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) return false; #endif // TARGET_ARM } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 //------------------------------------------------------------------------ // compGetTieringName: get a string describing tiered compilation settings diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4b378ce991ff6..3dc4f23f18d1d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -481,9 +481,15 @@ class LclVarDsc unsigned char lvIsTemp : 1; // Short-lifetime compiler temp -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + +#if defined(TARGET_LOONGARCH64) + unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. + unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. + unsigned char lvIsSplit : 1; // Set if the argument is splited. +#endif // defined(TARGET_LOONGARCH64) unsigned char lvIsBoolean : 1; // set if variable is boolean unsigned char lvSingleDef : 1; // variable has a single def @@ -1014,7 +1020,7 @@ class LclVarDsc } #endif assert(m_layout != nullptr); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) assert(varTypeIsStruct(TypeGet()) || (lvIsImplicitByRef && (TypeGet() == TYP_BYREF))); #else assert(varTypeIsStruct(TypeGet())); @@ -1623,7 +1629,7 @@ struct FuncInfoDsc emitLocation* coldStartLoc; // locations for the cold section, if there is one. emitLocation* coldEndLoc; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) UnwindInfo uwi; // Unwind information for this function/funclet's hot section UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section @@ -1638,7 +1644,7 @@ struct FuncInfoDsc emitLocation* coldStartLoc; // locations for the cold section, if there is one. emitLocation* coldEndLoc; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #if defined(FEATURE_CFI_SUPPORT) jitstd::vector* cfiCodes; @@ -1669,6 +1675,12 @@ struct fgArgTabEntry unsigned numRegs; // Count of number of registers that this argument uses. // Note that on ARM, if we have a double hfa, this reflects the number // of DOUBLE registers. +#ifdef TARGET_LOONGARCH64 + // For LoongArch64's ABI, the struct which has float field(s) and no more than two fields + // may be passed by float register(s). + // e.g `struct {int a; float b;}` passed by an integer register and a float register. + var_types structFloatFieldType[2]; +#endif #if defined(UNIX_AMD64_ABI) // Unix amd64 will split floating point types and integer types in structs @@ -2158,7 +2170,7 @@ struct fgArgTabEntry // register numbers. void SetMultiRegNums() { -#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) +#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) if (numRegs == 1) { return; @@ -2179,7 +2191,7 @@ struct fgArgTabEntry argReg = (regNumber)(argReg + regSize); setRegNum(regIndex, argReg); } -#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) +#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) } #ifdef DEBUG @@ -2297,6 +2309,20 @@ class fgArgInfo const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr); #endif // UNIX_AMD64_ABI +#if defined(TARGET_LOONGARCH64) + fgArgTabEntry* AddRegArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + regNumber regNum, + unsigned numRegs, + unsigned byteSize, + unsigned byteAlignment, + bool isStruct, + bool isFloatHfa, /* unused */ + bool isVararg, + const regNumber nextOtherRegNum); +#endif + fgArgTabEntry* AddStkArg(unsigned argNum, GenTree* node, GenTreeCall::Use* use, @@ -4057,7 +4083,7 @@ class Compiler // For ARM64, this is structs larger than 16 bytes that are passed by reference. bool lvaIsImplicitByRefLocal(unsigned varNum) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) LclVarDsc* varDsc = lvaGetDesc(varNum); if (varDsc->lvIsImplicitByRef) { @@ -4066,7 +4092,7 @@ class Compiler assert(varTypeIsStruct(varDsc) || (varDsc->lvType == TYP_BYREF)); return true; } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return false; } @@ -8324,6 +8350,9 @@ class Compiler #elif defined(TARGET_ARM64) reg = REG_R11; regMask = RBM_R11; +#elif defined(TARGET_LOONGARCH64) + reg = REG_T8; + regMask = RBM_T8; #else #error Unsupported or unset target architecture #endif @@ -8732,6 +8761,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void unwindReturn(regNumber reg); // ret lr #endif // defined(TARGET_ARM64) +#if defined(TARGET_LOONGARCH64) + void unwindNop(); + void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last + // instruction and the current location. + void unwindSaveReg(regNumber reg, int offset); + void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset); + void unwindReturn(regNumber reg); +#endif // defined(TARGET_LOONGARCH64) + // // Private "helper" functions for the unwind implementation. // @@ -8817,9 +8855,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2; #elif defined(TARGET_ARM64) CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; +#elif defined(TARGET_LOONGARCH64) + // TODO: supporting SIMD feature for LoongArch64. + assert(!"unimplemented yet on LA"); + CORINFO_InstructionSet minimumIsa = 0; #else #error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 +#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64 return compOpportunisticallyDependsOn(minimumIsa); #else @@ -10272,6 +10314,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define CPU_ARM 0x0300 // The generic ARM CPU #define CPU_ARM64 0x0400 // The generic ARM64 CPU +#define CPU_LOONGARCH64 0x0800 // The generic LOONGARCH64 CPU + unsigned genCPU; // What CPU are we running on // Number of class profile probes in this method @@ -10797,7 +10841,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void compSetProcessor(); void compInitDebuggingInfo(); void compSetOptimizationLevel(); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool compRsvdRegCheck(FrameLayoutState curState); #endif void compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags); @@ -12149,6 +12193,13 @@ const instruction INS_SQRT = INS_fsqrt; #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 +const instruction INS_BREAKPOINT = INS_break; +const instruction INS_MULADD = INS_fmadd_d; // NOTE: default is double. +const instruction INS_ABS = INS_fabs_d; // NOTE: default is double. +const instruction INS_SQRT = INS_fsqrt_d; // NOTE: default is double. +#endif // TARGET_LOONGARCH64 + /*****************************************************************************/ extern const BYTE genTypeSizes[]; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index b74234165b03f..f3aeff3c2e54e 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -602,7 +602,7 @@ inline bool isRegParamType(var_types type) #endif // !TARGET_X86 } -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) +#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) /*****************************************************************************/ // Returns true if 'type' is a struct that can be enregistered for call args // or can be returned by value in multiple registers. @@ -660,7 +660,7 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types typ return result; } -#endif // TARGET_AMD64 || TARGET_ARMARCH +#endif // TARGET_AMD64 || TARGET_ARMARCH || TARGET_LOONGARCH64 /*****************************************************************************/ @@ -1108,7 +1108,7 @@ inline GenTreeField* Compiler::gtNewFieldRef(var_types type, CORINFO_FIELD_HANDL LclVarDsc* varDsc = lvaGetDesc(obj->AsUnOp()->gtOp1->AsLclVarCommon()); varDsc->lvFieldAccessed = 1; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // These structs are passed by reference and can easily become global // references if those references are exposed. We clear out // address-exposure information for these parameters when they are @@ -1120,7 +1120,7 @@ inline GenTreeField* Compiler::gtNewFieldRef(var_types type, CORINFO_FIELD_HANDL { fieldNode->gtFlags |= GTF_GLOB_REF; } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) } else { @@ -1837,10 +1837,10 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState bool doubleWeight = lvIsTemp; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // and, for the time being, implicit byref params doubleWeight |= lvIsImplicitByRef; -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (doubleWeight && (weight * 2 > weight)) { @@ -3084,6 +3084,8 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum) #ifdef TARGET_ARM return regNum - REG_F0; +#elif defined(TARGET_LOONGARCH64) + return regNum - REG_F0; #elif defined(TARGET_ARM64) return regNum - REG_V0; #elif defined(UNIX_AMD64_ABI) diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index fc354aaffc563..4f0556f1b8ec7 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -444,6 +444,14 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* } } } +#elif defined(TARGET_LOONGARCH64) + // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference + if (structSize > MAX_PASS_MULTIREG_BYTES) + { + // This struct is passed by reference using a single 'slot' + return TARGET_POINTER_SIZE; + } +// otherwise will we pass this struct by value in multiple registers #elif !defined(TARGET_ARM) NYI("unknown target"); #endif // defined(TARGET_XXX) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index a507cadf36aed..ba73a2f8e09f3 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1118,6 +1118,10 @@ void emitter::emitBegFN(bool hasFramePtr emitFirstColdIG = nullptr; emitTotalCodeSize = 0; +#ifdef TARGET_LOONGARCH64 + emitCounts_INS_OPTS_J = 0; +#endif + #if EMITTER_STATS emitTotalIGmcnt++; emitSizeMethod = 0; @@ -1296,6 +1300,13 @@ weight_t emitter::getCurrentBlockWeight() } } +#if defined(TARGET_LOONGARCH64) +void emitter::dispIns(instrDesc* id) +{ + // For LoongArch64 using the emitDisInsName(). + NYI_LOONGARCH64("Not used on LOONGARCH64."); +} +#else void emitter::dispIns(instrDesc* id) { #ifdef DEBUG @@ -1317,6 +1328,7 @@ void emitter::dispIns(instrDesc* id) emitIFcounts[id->idInsFmt()]++; #endif } +#endif void emitter::appendToCurIG(instrDesc* id) { @@ -2305,6 +2317,11 @@ void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi) #ifdef TARGET_AMD64 // doesn't have to be all negative on amd printf("-%04X ... %04X\n", -offsLo, offsHi); +#elif defined(TARGET_LOONGARCH64) + if (offsHi < 0) + printf("-%04X ... -%04X\n", -offsLo, -offsHi); + else + printf("-%04X ... %04X\n", -offsLo, offsHi); #else printf("-%04X ... -%04X\n", -offsLo, -offsHi); assert(offsHi <= 0); @@ -2638,7 +2655,7 @@ const char* emitter::emitLabelString(insGroup* ig) #endif // DEBUG -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Does the argument location point to an IG at the end of a function or funclet? // We can ignore the codePos part of the location, since it doesn't affect the @@ -2999,9 +3016,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context) Compiler* comp = (Compiler*)context; #if defined(TARGET_ARM) comp->unwindNop(id->idCodeSize()); -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) comp->unwindNop(); -#endif // defined(TARGET_ARM64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) } /***************************************************************************** @@ -3015,7 +3032,7 @@ void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp) emitWalkIDs(locFrom, emitGenerateUnwindNop, comp); } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #if defined(TARGET_ARM) @@ -3402,6 +3419,9 @@ const size_t hexEncodingSize = 19; #elif defined(TARGET_ARM) const size_t basicIndent = 12; const size_t hexEncodingSize = 11; +#elif defined(TARGET_LOONGARCH64) +const size_t basicIndent = 12; +const size_t hexEncodingSize = 19; #endif #ifdef DEBUG @@ -4083,8 +4103,10 @@ void emitter::emitDispCommentForHandle(size_t handle, GenTreeFlags flag) * ARM64 has a small and large encoding for both conditional branch and loading label addresses. * The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently * NYI). + * LoongArch64 has an individual implementation for emitJumpDistBind(). */ +#ifndef TARGET_LOONGARCH64 void emitter::emitJumpDistBind() { #ifdef DEBUG @@ -4835,6 +4857,7 @@ void emitter::emitJumpDistBind() emitCheckIGoffsets(); #endif // DEBUG } +#endif #if FEATURE_LOOP_ALIGN @@ -5645,6 +5668,11 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr) void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) { +#ifdef TARGET_LOONGARCH64 + // TODO-LoongArch64: support idDebugOnlyInfo. + return; +#else + #ifdef DEBUG // We should not be jumping/branching across funclets/functions // Except possibly a 'call' to a finally funclet for a local unwind @@ -5740,6 +5768,7 @@ void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) } } #endif // DEBUG +#endif } /***************************************************************************** @@ -6523,7 +6552,11 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } -#endif // TARGET_XARCH +#elif defined(TARGET_LOONGARCH64) + + isJccAffectedIns = true; + +#endif // TARGET_LOONGARCH64 // Jcc affected instruction boundaries were printed above; handle other cases here. if (!isJccAffectedIns) @@ -6693,6 +6726,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_ARM64) assert(!jmp->idAddr()->iiaHasInstrCount()); emitOutputLJ(NULL, adr, jmp); +#elif defined(TARGET_LOONGARCH64) + // For LoongArch64 `emitFwdJumps` is always false. + unreached(); #else #error Unsupported or unset target architecture #endif @@ -6706,6 +6742,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_ARMARCH) assert(!jmp->idAddr()->iiaHasInstrCount()); emitOutputLJ(NULL, adr, jmp); +#elif defined(TARGET_LOONGARCH64) + // For LoongArch64 `emitFwdJumps` is always false. + unreached(); #else #error Unsupported or unset target architecture #endif @@ -8678,7 +8717,7 @@ cnsval_ssize_t emitter::emitGetInsSC(instrDesc* id) int adr = emitComp->lvaFrameAddress(varNum, &FPbased); int dsp = adr + offs; if (id->idIns() == INS_sub) - dsp = -dsp; + dsp = -dsp; #endif return dsp; } @@ -9290,13 +9329,14 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) // This uses and defs RDI and RSI. result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI); break; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; break; #else assert(!"unknown arch"); #endif +#if !defined(TARGET_LOONGARCH64) case CORINFO_HELP_PROF_FCN_ENTER: result = RBM_PROFILER_ENTER_TRASH; break; @@ -9313,8 +9353,9 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) case CORINFO_HELP_PROF_FCN_TAILCALL: result = RBM_PROFILER_TAILCALL_TRASH; break; +#endif // !defined(TARGET_LOONGARCH64) -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) case CORINFO_HELP_ASSIGN_REF: case CORINFO_HELP_CHECKED_ASSIGN_REF: result = RBM_CALLEE_GCTRASH_WRITEBARRIER; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 1571fc00cbafa..60971839bc507 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -590,15 +590,21 @@ class emitter #define INSTR_ENCODED_SIZE 4 static_assert_no_msg(INS_count <= 512); instruction _idIns : 9; -#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) +#elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64: not include SIMD-vector. + static_assert_no_msg(INS_count <= 512); + instruction _idIns : 9; +#else static_assert_no_msg(INS_count <= 256); instruction _idIns : 8; -#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) +#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) // The format for the instruction #if defined(TARGET_XARCH) static_assert_no_msg(IF_COUNT <= 128); insFormat _idInsFmt : 7; +#elif defined(TARGET_LOONGARCH64) + unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. #else static_assert_no_msg(IF_COUNT <= 256); insFormat _idInsFmt : 8; @@ -624,7 +630,16 @@ class emitter return idInsIs(ins) || idInsIs(rest...); } +#if defined(TARGET_LOONGARCH64) insFormat idInsFmt() const + { // not used for LOONGARCH64. + return (insFormat)0; + } + void idInsFmt(insFormat insFmt) + { + } +#else + insFormat idInsFmt() const { return _idInsFmt; } @@ -636,6 +651,7 @@ class emitter assert(insFmt < IF_COUNT); _idInsFmt = insFmt; } +#endif void idSetRelocFlags(emitAttr attr) { @@ -649,6 +665,7 @@ class emitter // amd64: 17 bits // arm: 16 bits // arm64: 17 bits + // loongarch64: 14 bits private: #if defined(TARGET_XARCH) @@ -656,11 +673,11 @@ class emitter opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32 // At this point we have fully consumed first DWORD so that next field // doesn't cross a byte boundary. -#elif defined(TARGET_ARM64) -// Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields -#else // ARM - opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 -#endif // ARM +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +/* _idOpSize defined below. */ +#else + opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 +#endif // ARM || TARGET_LOONGARCH64 // On Amd64, this is where the second DWORD begins // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has member that @@ -708,6 +725,14 @@ class emitter unsigned _idLclVar : 1; // access a local on stack #endif +#ifdef TARGET_LOONGARCH64 + // TODO-LoongArch64: maybe delete on future. + opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 + insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the + // accessing a local on stack. + unsigned _idLclVar : 1; // access a local on stack. +#endif + #ifdef TARGET_ARM insSize _idInsSize : 2; // size of instruction: 16, 32 or 48 bits insFlags _idInsFlags : 1; // will this instruction set the flags @@ -721,8 +746,8 @@ class emitter #elif defined(TARGET_ARM64) // For Arm64, we have used 17 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (17) -#elif defined(TARGET_XARCH) - // For xarch, we have used 14 bits from the second DWORD. +#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) + // For xarch and LoongArch64, we have used 14 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (14) #else #error Unsupported or unset target architecture @@ -734,6 +759,7 @@ class emitter // amd64: 46 bits // arm: 48 bits // arm64: 49 bits + // loongarch64: 46 bits unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag @@ -746,6 +772,7 @@ class emitter // amd64: 48 bits // arm: 50 bits // arm64: 51 bits + // loongarch64: 48 bits CLANG_FORMAT_COMMENT_ANCHOR; #define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS) @@ -823,7 +850,7 @@ class emitter // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts // about reading what we think is here, to avoid unexpected corruption issues. -#ifndef TARGET_ARM64 +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) emitLclVarAddr iiaLclVar; #endif BasicBlock* iiaBBlabel; @@ -877,7 +904,38 @@ class emitter regNumber _idReg3 : REGNUM_BITS; regNumber _idReg4 : REGNUM_BITS; }; -#endif // defined(TARGET_XARCH) +#elif defined(TARGET_LOONGARCH64) + struct + { + unsigned int iiaEncodedInstr; // instruction's binary encoding. + regNumber _idReg3 : REGNUM_BITS; + regNumber _idReg4 : REGNUM_BITS; + }; + + struct + { + int iiaJmpOffset; // temporary saving the offset of jmp or data. + emitLclVarAddr iiaLclVar; + }; + + void iiaSetInstrEncode(unsigned int encode) + { + iiaEncodedInstr = encode; + } + unsigned int iiaGetInstrEncode() const + { + return iiaEncodedInstr; + } + + void iiaSetJmpOffset(int offset) + { + iiaJmpOffset = offset; + } + int iiaGetJmpOffset() const + { + return iiaJmpOffset; + } +#endif // defined(TARGET_LOONGARCH64) } _idAddrUnion; @@ -977,7 +1035,20 @@ class emitter _idInsFlags = sf; assert(sf == _idInsFlags); } -#endif // TARGET_ARM + +#elif defined(TARGET_LOONGARCH64) + unsigned idCodeSize() const + { + return _idCodeSize; + } + void idCodeSize(unsigned sz) + { + // LoongArch64's instrDesc is not always meaning only one instruction. + // e.g. the `emitter::emitIns_I_la` for emitting the immediates. + assert(sz <= 16); + _idCodeSize = sz; + } +#endif // TARGET_LOONGARCH64 emitAttr idOpSize() { @@ -1102,6 +1173,42 @@ class emitter #endif // TARGET_ARMARCH +#ifdef TARGET_LOONGARCH64 + insOpts idInsOpt() const + { + return (insOpts)_idInsOpt; + } + void idInsOpt(insOpts opt) + { + _idInsOpt = opt; + assert(opt == _idInsOpt); + } + + regNumber idReg3() const + { + assert(!idIsSmallDsc()); + return idAddr()->_idReg3; + } + void idReg3(regNumber reg) + { + assert(!idIsSmallDsc()); + idAddr()->_idReg3 = reg; + assert(reg == idAddr()->_idReg3); + } + regNumber idReg4() const + { + assert(!idIsSmallDsc()); + return idAddr()->_idReg4; + } + void idReg4(regNumber reg) + { + assert(!idIsSmallDsc()); + idAddr()->_idReg4 = reg; + assert(reg == idAddr()->_idReg4); + } + +#endif // TARGET_LOONGARCH64 + inline static bool fitsInSmallCns(ssize_t val) { return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS)); @@ -1190,6 +1297,17 @@ class emitter } #endif // defined(TARGET_ARM) +#ifdef TARGET_LOONGARCH64 + bool idIsLclVar() const + { + return _idLclVar != 0; + } + void idSetIsLclVar() + { + _idLclVar = 1; + } +#endif // TARGET_LOONGARCH64 + bool idIsCnsReloc() const { return _idCnsReloc != 0; @@ -1340,6 +1458,23 @@ class emitter #define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C #define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C +#elif defined(TARGET_LOONGARCH64) +// a read,write or modify from stack location, possible def to use latency from L0 cache +#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C +#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C + +// a read, write or modify from constant location, possible def to use latency from L0 cache +#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C +#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C + +// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache +// plus an extra cost (of 1.0) for a increased chance of a cache miss +#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C +#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C + #endif // TARGET_XXX // Make this an enum: @@ -1750,6 +1885,10 @@ class emitter #endif // defined(TARGET_X86) #endif // !defined(HOST_64BIT) +#ifdef TARGET_LOONGARCH64 + unsigned int emitCounts_INS_OPTS_J; +#endif // TARGET_LOONGARCH64 + size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp); size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp); @@ -1815,7 +1954,7 @@ class emitter // CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // ARM32 and ARM64 both can require a bigger prolog instruction group. One scenario is where // a function uses all the incoming integer and single-precision floating-point arguments, // and must store them all to the frame on entry. If the frame is very large, we generate @@ -1829,9 +1968,10 @@ class emitter // ldr w8, [fp, xip1] // [V10 arg10] // which eats up our insGroup buffer. #define SC_IG_BUFFER_SIZE (200 * sizeof(emitter::instrDesc)) -#else // !TARGET_ARMARCH + +#else #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) -#endif // !TARGET_ARMARCH +#endif // !(TARGET_ARMARCH || TARGET_LOONGARCH64) size_t emitIGbuffSize; @@ -2013,7 +2153,7 @@ class emitter const char* emitLabelString(insGroup* ig); #endif -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt); @@ -2027,7 +2167,7 @@ class emitter static void emitGenerateUnwindNop(instrDesc* id, void* context); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #ifdef TARGET_X86 void emitMarkStackLvl(unsigned stackLevel); @@ -2197,8 +2337,10 @@ class emitter static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind); #ifdef DEBUG +#ifndef TARGET_LOONGARCH64 void emitInsSanityCheck(instrDesc* id); #endif +#endif #ifdef TARGET_ARMARCH // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC @@ -2218,7 +2360,10 @@ class emitter // Returns "true" if instruction "id->idIns()" writes to a LclVar stack slot pair. bool emitInsWritesToLclVarStackLocPair(instrDesc* id); -#endif // TARGET_ARMARCH +#elif defined(TARGET_LOONGARCH64) + bool emitInsMayWriteToGCReg(instruction ins); + bool emitInsWritesToLclVarStackLoc(instrDesc* id); +#endif // TARGET_LOONGARCH64 /************************************************************************/ /* The following is used to distinguish helper vs non-helper calls */ diff --git a/src/coreclr/jit/emitdef.h b/src/coreclr/jit/emitdef.h index c9f003ccce1b6..35b46314a1225 100644 --- a/src/coreclr/jit/emitdef.h +++ b/src/coreclr/jit/emitdef.h @@ -12,6 +12,8 @@ #include "emitarm.h" #elif defined(TARGET_ARM64) #include "emitarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "emitloongarch64.h" #else #error Unsupported or unset target architecture #endif diff --git a/src/coreclr/jit/emitfmts.h b/src/coreclr/jit/emitfmts.h index c252c0b1237d3..77712ed95cce3 100644 --- a/src/coreclr/jit/emitfmts.h +++ b/src/coreclr/jit/emitfmts.h @@ -8,6 +8,8 @@ #include "emitfmtsarm.h" #elif defined(TARGET_ARM64) #include "emitfmtsarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "emitfmtsloongarch64.h" #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h new file mode 100644 index 0000000000000..3dab2b7dc2704 --- /dev/null +++ b/src/coreclr/jit/emitfmtsloongarch64.h @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +////////////////////////////////////////////////////////////////////////////// +// define this file for LoongArch64 just for avoiding compiling errors. +// This is moot right now. + +// clang-format off +#if !defined(TARGET_LOONGARCH64) +#error Unexpected target type +#endif + +#ifdef DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// + +enum ID_OPS +{ + ID_OP_NONE, // no additional arguments +}; + +#undef DEFINE_ID_OPS + +////////////////////////////////////////////////////////////////////////////// +#else // !DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// + +#ifndef IF_DEF +#error Must define IF_DEF macro before including this file +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// enum insFormat instruction enum ID_OPS +// scheduling +// (unused) +////////////////////////////////////////////////////////////////////////////// + +IF_DEF(NONE, IS_NONE, NONE) + +////////////////////////////////////////////////////////////////////////////// +#undef IF_DEF +////////////////////////////////////////////////////////////////////////////// + +#endif // !DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// +// clang-format on diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 484eca3399b4e..82c78299efebd 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -335,6 +335,36 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id) id->idReg2((regNumber)encodeMask); // Save in idReg2 +#elif defined(TARGET_LOONGARCH64) + assert(REGNUM_BITS >= 5); + encodeMask = 0; + + if ((regmask & RBM_S0) != RBM_NONE) + encodeMask |= 0x01; + if ((regmask & RBM_S1) != RBM_NONE) + encodeMask |= 0x02; + if ((regmask & RBM_S2) != RBM_NONE) + encodeMask |= 0x04; + if ((regmask & RBM_S3) != RBM_NONE) + encodeMask |= 0x08; + if ((regmask & RBM_S4) != RBM_NONE) + encodeMask |= 0x10; + + id->idReg1((regNumber)encodeMask); // Save in idReg1 + + encodeMask = 0; + + if ((regmask & RBM_S5) != RBM_NONE) + encodeMask |= 0x01; + if ((regmask & RBM_S6) != RBM_NONE) + encodeMask |= 0x02; + if ((regmask & RBM_S7) != RBM_NONE) + encodeMask |= 0x04; + if ((regmask & RBM_S8) != RBM_NONE) + encodeMask |= 0x08; + + id->idReg2((regNumber)encodeMask); // Save in idReg2 + #else NYI("unknown target"); #endif @@ -447,6 +477,32 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id) if ((encodeMask & 0x10) != 0) regmask |= RBM_R28; +#elif defined(TARGET_LOONGARCH64) + assert(REGNUM_BITS >= 5); + encodeMask = id->idReg1(); + + if ((encodeMask & 0x01) != 0) + regmask |= RBM_S0; + if ((encodeMask & 0x02) != 0) + regmask |= RBM_S1; + if ((encodeMask & 0x04) != 0) + regmask |= RBM_S2; + if ((encodeMask & 0x08) != 0) + regmask |= RBM_S3; + if ((encodeMask & 0x10) != 0) + regmask |= RBM_S4; + + encodeMask = id->idReg2(); + + if ((encodeMask & 0x01) != 0) + regmask |= RBM_S5; + if ((encodeMask & 0x02) != 0) + regmask |= RBM_S6; + if ((encodeMask & 0x04) != 0) + regmask |= RBM_S7; + if ((encodeMask & 0x08) != 0) + regmask |= RBM_S8; + #else NYI("unknown target"); #endif diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index 4ed340302119d..cd10727f6eec3 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -46,6 +46,13 @@ JMP_SMALL(lt , ge , blt ) // LT JMP_SMALL(gt , le , bgt ) // GT JMP_SMALL(le , gt , ble ) // LE +#elif defined(TARGET_LOONGARCH64) + +// TODO-LOONGARCH64: adding other condition branches. +JMP_SMALL(jmp , jmp , b ) +JMP_SMALL(eq , ne , beq ) // EQ +JMP_SMALL(ne , eq , bne ) // NE + #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp new file mode 100644 index 0000000000000..9fb3e1f9cac1c --- /dev/null +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -0,0 +1,6781 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX emitloongarch64.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_LOONGARCH64) + +/*****************************************************************************/ +/*****************************************************************************/ + +#include "instr.h" +#include "emit.h" +#include "codegen.h" + +////These are used for loongarch64 instrs's dump. +////LA_OP_2R opcode: bit31 ~ bit10 +#define LA_2R_CLO_W 0x4 +#define LA_2R_CLZ_W 0x5 +#define LA_2R_CTO_W 0x6 +#define LA_2R_CTZ_W 0x7 +#define LA_2R_CLO_D 0x8 +#define LA_2R_CLZ_D 0x9 +#define LA_2R_CTO_D 0xa +#define LA_2R_CTZ_D 0xb +#define LA_2R_REVB_2H 0xc +#define LA_2R_REVB_4H 0xd +#define LA_2R_REVB_2W 0xe +#define LA_2R_REVB_D 0xf +#define LA_2R_REVH_2W 0x10 +#define LA_2R_REVH_D 0x11 +#define LA_2R_BITREV_4B 0x12 +#define LA_2R_BITREV_8B 0x13 +#define LA_2R_BITREV_W 0x14 +#define LA_2R_BITREV_D 0x15 +#define LA_2R_EXT_W_H 0x16 +#define LA_2R_EXT_W_B 0x17 +#define LA_2R_RDTIMEL_W 0x18 +#define LA_2R_RDTIMEH_W 0x19 +#define LA_2R_RDTIME_D 0x1a +#define LA_2R_CPUCFG 0x1b +#define LA_2R_ASRTLE_D 0x2 +#define LA_2R_ASRTGT_D 0x3 +#define LA_2R_FABS_S 0x4501 +#define LA_2R_FABS_D 0x4502 +#define LA_2R_FNEG_S 0x4505 +#define LA_2R_FNEG_D 0x4506 +#define LA_2R_FLOGB_S 0x4509 +#define LA_2R_FLOGB_D 0x450a +#define LA_2R_FCLASS_S 0x450d +#define LA_2R_FCLASS_D 0x450e +#define LA_2R_FSQRT_S 0x4511 +#define LA_2R_FSQRT_D 0x4512 +#define LA_2R_FRECIP_S 0x4515 +#define LA_2R_FRECIP_D 0x4516 +#define LA_2R_FRSQRT_S 0x4519 +#define LA_2R_FRSQRT_D 0x451a +#define LA_2R_FMOV_S 0x4525 +#define LA_2R_FMOV_D 0x4526 +#define LA_2R_MOVGR2FR_W 0x4529 +#define LA_2R_MOVGR2FR_D 0x452a +#define LA_2R_MOVGR2FRH_W 0x452b +#define LA_2R_MOVFR2GR_S 0x452d +#define LA_2R_MOVFR2GR_D 0x452e +#define LA_2R_MOVFRH2GR_S 0x452f +#define LA_2R_MOVGR2FCSR 0x4530 +#define LA_2R_MOVFCSR2GR 0x4532 +#define LA_2R_MOVFR2CF 0x4534 +#define LA_2R_MOVCF2FR 0x4535 +#define LA_2R_MOVGR2CF 0x4536 +#define LA_2R_MOVCF2GR 0x4537 +#define LA_2R_FCVT_S_D 0x4646 +#define LA_2R_FCVT_D_S 0x4649 +#define LA_2R_FTINTRM_W_S 0x4681 +#define LA_2R_FTINTRM_W_D 0x4682 +#define LA_2R_FTINTRM_L_S 0x4689 +#define LA_2R_FTINTRM_L_D 0x468a +#define LA_2R_FTINTRP_W_S 0x4691 +#define LA_2R_FTINTRP_W_D 0x4692 +#define LA_2R_FTINTRP_L_S 0x4699 +#define LA_2R_FTINTRP_L_D 0x469a +#define LA_2R_FTINTRZ_W_S 0x46a1 +#define LA_2R_FTINTRZ_W_D 0x46a2 +#define LA_2R_FTINTRZ_L_S 0x46a9 +#define LA_2R_FTINTRZ_L_D 0x46aa +#define LA_2R_FTINTRNE_W_S 0x46b1 +#define LA_2R_FTINTRNE_W_D 0x46b2 +#define LA_2R_FTINTRNE_L_S 0x46b9 +#define LA_2R_FTINTRNE_L_D 0x46ba +#define LA_2R_FTINT_W_S 0x46c1 +#define LA_2R_FTINT_W_D 0x46c2 +#define LA_2R_FTINT_L_S 0x46c9 +#define LA_2R_FTINT_L_D 0x46ca +#define LA_2R_FFINT_S_W 0x4744 +#define LA_2R_FFINT_S_L 0x4746 +#define LA_2R_FFINT_D_W 0x4748 +#define LA_2R_FFINT_D_L 0x474a +#define LA_2R_FRINT_S 0x4791 +#define LA_2R_FRINT_D 0x4792 +#define LA_2R_IOCSRRD_B 0x19200 +#define LA_2R_IOCSRRD_H 0x19201 +#define LA_2R_IOCSRRD_W 0x19202 +#define LA_2R_IOCSRRD_D 0x19203 +#define LA_2R_IOCSRWR_B 0x19204 +#define LA_2R_IOCSRWR_H 0x19205 +#define LA_2R_IOCSRWR_W 0x19206 +#define LA_2R_IOCSRWR_D 0x19207 + +////LA_OP_3R opcode: bit31 ~ bit15 +#define LA_3R_ADD_W 0x20 +#define LA_3R_ADD_D 0x21 +#define LA_3R_SUB_W 0x22 +#define LA_3R_SUB_D 0x23 +#define LA_3R_SLT 0x24 +#define LA_3R_SLTU 0x25 +#define LA_3R_MASKEQZ 0x26 +#define LA_3R_MASKNEZ 0x27 +#define LA_3R_NOR 0x28 +#define LA_3R_AND 0x29 +#define LA_3R_OR 0x2a +#define LA_3R_XOR 0x2b +#define LA_3R_ORN 0x2c +#define LA_3R_ANDN 0x2d +#define LA_3R_SLL_W 0x2e +#define LA_3R_SRL_W 0x2f +#define LA_3R_SRA_W 0x30 +#define LA_3R_SLL_D 0x31 +#define LA_3R_SRL_D 0x32 +#define LA_3R_SRA_D 0x33 +#define LA_3R_ROTR_W 0x36 +#define LA_3R_ROTR_D 0x37 +#define LA_3R_MUL_W 0x38 +#define LA_3R_MULH_W 0x39 +#define LA_3R_MULH_WU 0x3a +#define LA_3R_MUL_D 0x3b +#define LA_3R_MULH_D 0x3c +#define LA_3R_MULH_DU 0x3d +#define LA_3R_MULW_D_W 0x3e +#define LA_3R_MULW_D_WU 0x3f +#define LA_3R_DIV_W 0x40 +#define LA_3R_MOD_W 0x41 +#define LA_3R_DIV_WU 0x42 +#define LA_3R_MOD_WU 0x43 +#define LA_3R_DIV_D 0x44 +#define LA_3R_MOD_D 0x45 +#define LA_3R_DIV_DU 0x46 +#define LA_3R_MOD_DU 0x47 +#define LA_3R_CRC_W_B_W 0x48 +#define LA_3R_CRC_W_H_W 0x49 +#define LA_3R_CRC_W_W_W 0x4a +#define LA_3R_CRC_W_D_W 0x4b +#define LA_3R_CRCC_W_B_W 0x4c +#define LA_3R_CRCC_W_H_W 0x4d +#define LA_3R_CRCC_W_W_W 0x4e +#define LA_3R_CRCC_W_D_W 0x4f +#define LA_3R_FADD_S 0x201 +#define LA_3R_FADD_D 0x202 +#define LA_3R_FSUB_S 0x205 +#define LA_3R_FSUB_D 0x206 +#define LA_3R_FMUL_S 0x209 +#define LA_3R_FMUL_D 0x20a +#define LA_3R_FDIV_S 0x20d +#define LA_3R_FDIV_D 0x20e +#define LA_3R_FMAX_S 0x211 +#define LA_3R_FMAX_D 0x212 +#define LA_3R_FMIN_S 0x215 +#define LA_3R_FMIN_D 0x216 +#define LA_3R_FMAXA_S 0x219 +#define LA_3R_FMAXA_D 0x21a +#define LA_3R_FMINA_S 0x21d +#define LA_3R_FMINA_D 0x21e +#define LA_3R_FSCALEB_S 0x221 +#define LA_3R_FSCALEB_D 0x222 +#define LA_3R_FCOPYSIGN_S 0x225 +#define LA_3R_FCOPYSIGN_D 0x226 +#define LA_3R_INVTLB 0xc91 +#define LA_3R_LDX_B 0x7000 +#define LA_3R_LDX_H 0x7008 +#define LA_3R_LDX_W 0x7010 +#define LA_3R_LDX_D 0x7018 +#define LA_3R_STX_B 0x7020 +#define LA_3R_STX_H 0x7028 +#define LA_3R_STX_W 0x7030 +#define LA_3R_STX_D 0x7038 +#define LA_3R_LDX_BU 0x7040 +#define LA_3R_LDX_HU 0x7048 +#define LA_3R_LDX_WU 0x7050 +#define LA_3R_PRELDX 0x7058 +#define LA_3R_FLDX_S 0x7060 +#define LA_3R_FLDX_D 0x7068 +#define LA_3R_FSTX_S 0x7070 +#define LA_3R_FSTX_D 0x7078 +#define LA_3R_AMSWAP_W 0x70c0 +#define LA_3R_AMSWAP_D 0x70c1 +#define LA_3R_AMADD_W 0x70c2 +#define LA_3R_AMADD_D 0x70c3 +#define LA_3R_AMAND_W 0x70c4 +#define LA_3R_AMAND_D 0x70c5 +#define LA_3R_AMOR_W 0x70c6 +#define LA_3R_AMOR_D 0x70c7 +#define LA_3R_AMXOR_W 0x70c8 +#define LA_3R_AMXOR_D 0x70c9 +#define LA_3R_AMMAX_W 0x70ca +#define LA_3R_AMMAX_D 0x70cb +#define LA_3R_AMMIN_W 0x70cc +#define LA_3R_AMMIN_D 0x70cd +#define LA_3R_AMMAX_WU 0x70ce +#define LA_3R_AMMAX_DU 0x70cf +#define LA_3R_AMMIN_WU 0x70d0 +#define LA_3R_AMMIN_DU 0x70d1 +#define LA_3R_AMSWAP_DB_W 0x70d2 +#define LA_3R_AMSWAP_DB_D 0x70d3 +#define LA_3R_AMADD_DB_W 0x70d4 +#define LA_3R_AMADD_DB_D 0x70d5 +#define LA_3R_AMAND_DB_W 0x70d6 +#define LA_3R_AMAND_DB_D 0x70d7 +#define LA_3R_AMOR_DB_W 0x70d8 +#define LA_3R_AMOR_DB_D 0x70d9 +#define LA_3R_AMXOR_DB_W 0x70da +#define LA_3R_AMXOR_DB_D 0x70db +#define LA_3R_AMMAX_DB_W 0x70dc +#define LA_3R_AMMAX_DB_D 0x70dd +#define LA_3R_AMMIN_DB_W 0x70de +#define LA_3R_AMMIN_DB_D 0x70df +#define LA_3R_AMMAX_DB_WU 0x70e0 +#define LA_3R_AMMAX_DB_DU 0x70e1 +#define LA_3R_AMMIN_DB_WU 0x70e2 +#define LA_3R_AMMIN_DB_DU 0x70e3 +#define LA_3R_FLDGT_S 0x70e8 +#define LA_3R_FLDGT_D 0x70e9 +#define LA_3R_FLDLE_S 0x70ea +#define LA_3R_FLDLE_D 0x70eb +#define LA_3R_FSTGT_S 0x70ec +#define LA_3R_FSTGT_D 0x70ed +#define LA_3R_FSTLE_S 0x70ee +#define LA_3R_FSTLE_D 0x70ef +#define LA_3R_LDGT_B 0x70f0 +#define LA_3R_LDGT_H 0x70f1 +#define LA_3R_LDGT_W 0x70f2 +#define LA_3R_LDGT_D 0x70f3 +#define LA_3R_LDLE_B 0x70f4 +#define LA_3R_LDLE_H 0x70f5 +#define LA_3R_LDLE_W 0x70f6 +#define LA_3R_LDLE_D 0x70f7 +#define LA_3R_STGT_B 0x70f8 +#define LA_3R_STGT_H 0x70f9 +#define LA_3R_STGT_W 0x70fa +#define LA_3R_STGT_D 0x70fb +#define LA_3R_STLE_B 0x70fc +#define LA_3R_STLE_H 0x70fd +#define LA_3R_STLE_W 0x70fe +#define LA_3R_STLE_D 0x70ff + +////LA_OP_4R opcode: bit31 ~ bit20 +#define LA_4R_FMADD_S 0x81 +#define LA_4R_FMADD_D 0x82 +#define LA_4R_FMSUB_S 0x85 +#define LA_4R_FMSUB_D 0x86 +#define LA_4R_FNMADD_S 0x89 +#define LA_4R_FNMADD_D 0x8a +#define LA_4R_FNMSUB_S 0x8d +#define LA_4R_FNMSUB_D 0x8e +#define LA_4R_FSEL 0xd0 + +////LA_OP_2RI8 + +////LA_OP_2RI12 opcode: bit31 ~ bit22 +#define LA_2RI12_SLTI 0x8 +#define LA_2RI12_SLTUI 0x9 +#define LA_2RI12_ADDI_W 0xa +#define LA_2RI12_ADDI_D 0xb +#define LA_2RI12_LU52I_D 0xc +#define LA_2RI12_ANDI 0xd +#define LA_2RI12_ORI 0xe +#define LA_2RI12_XORI 0xf +#define LA_2RI12_CACHE 0x18 +#define LA_2RI12_LD_B 0xa0 +#define LA_2RI12_LD_H 0xa1 +#define LA_2RI12_LD_W 0xa2 +#define LA_2RI12_LD_D 0xa3 +#define LA_2RI12_ST_B 0xa4 +#define LA_2RI12_ST_H 0xa5 +#define LA_2RI12_ST_W 0xa6 +#define LA_2RI12_ST_D 0xa7 +#define LA_2RI12_LD_BU 0xa8 +#define LA_2RI12_LD_HU 0xa9 +#define LA_2RI12_LD_WU 0xaa +#define LA_2RI12_PRELD 0xab +#define LA_2RI12_FLD_S 0xac +#define LA_2RI12_FST_S 0xad +#define LA_2RI12_FLD_D 0xae +#define LA_2RI12_FST_D 0xaf + +////LA_OP_2RI14i opcode: bit31 ~ bit24 +#define LA_2RI14_LL_W 0x20 +#define LA_2RI14_SC_W 0x21 +#define LA_2RI14_LL_D 0x22 +#define LA_2RI14_SC_D 0x23 +#define LA_2RI14_LDPTR_W 0x24 +#define LA_2RI14_STPTR_W 0x25 +#define LA_2RI14_LDPTR_D 0x26 +#define LA_2RI14_STPTR_D 0x27 + +////LA_OP_2RI16 opcode: bit31 ~ bit26 +#define LA_2RI16_ADDU16I_D 0x4 +#define LA_2RI16_JIRL 0x13 +#define LA_2RI16_BEQ 0x16 +#define LA_2RI16_BNE 0x17 +#define LA_2RI16_BLT 0x18 +#define LA_2RI16_BGE 0x19 +#define LA_2RI16_BLTU 0x1a +#define LA_2RI16_BGEU 0x1b + +////LA_OP_1RI20 opcode: bit31 ~ bit25 +#define LA_1RI20_LU12I_W 0xa +#define LA_1RI20_LU32I_D 0xb +#define LA_1RI20_PCADDI 0xc +#define LA_1RI20_PCALAU12I 0xd +#define LA_1RI20_PCADDU12I 0xe +#define LA_1RI20_PCADDU18I 0xf + +////LA_OP_I26 +#define LA_I26_B 0x14 +#define LA_I26_BL 0x15 + +////LA_OP_1RI21 +#define LA_1RI21_BEQZ 0x10 +#define LA_1RI21_BNEZ 0x11 +#define LA_1RI21_BCEQZ 0x12 +#define LA_1RI21_BCNEZ 0x12 + +////other +#define LA_OP_ALSL_W 0x1 +#define LA_OP_ALSL_WU 0x1 +#define LA_OP_ALSL_D 0xb +#define LA_OP_BYTEPICK_W 0x2 +#define LA_OP_BYTEPICK_D 0x3 +#define LA_OP_BREAK 0x54 +#define LA_OP_DBGCALL 0x55 +#define LA_OP_SYSCALL 0x56 +#define LA_OP_SLLI_W 0x10 +#define LA_OP_SLLI_D 0x10 +#define LA_OP_SRLI_W 0x11 +#define LA_OP_SRLI_D 0x11 +#define LA_OP_SRAI_W 0x12 +#define LA_OP_SRAI_D 0x12 +#define LA_OP_ROTRI_W 0x13 +#define LA_OP_ROTRI_D 0x13 +#define LA_OP_FCMP_cond_S 0xc1 +#define LA_OP_FCMP_cond_D 0xc2 +#define LA_OP_BSTRINS_W 0x1 +#define LA_OP_BSTRPICK_W 0x1 +#define LA_OP_BSTRINS_D 0x2 +#define LA_OP_BSTRPICK_D 0x3 +#define LA_OP_DBAR 0x70e4 +#define LA_OP_IBAR 0x70e5 + +//// add other define-macro here. + +/*****************************************************************************/ + +const instruction emitJumpKindInstructions[] = { + INS_nop, + +#define JMP_SMALL(en, rev, ins) INS_##ins, +#include "emitjmps.h" +}; + +const emitJumpKind emitReverseJumpKinds[] = { + EJ_NONE, + +#define JMP_SMALL(en, rev, ins) EJ_##rev, +#include "emitjmps.h" +}; + +/***************************************************************************** + * Look up the instruction for a jump kind + */ + +/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind) +{ + assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions)); + return emitJumpKindInstructions[jumpKind]; +} + +/***************************************************************************** +* Look up the jump kind for an instruction. It better be a conditional +* branch instruction with a jump kind! +*/ + +/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) +{ + NYI_LOONGARCH64("emitInsToJumpKind-----unimplemented on LOONGARCH64 yet----"); + return EJ_NONE; +} + +/***************************************************************************** + * Reverse the conditional jump + */ + +/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind) +{ + assert(jumpKind < EJ_COUNT); + return emitReverseJumpKinds[jumpKind]; +} + +/***************************************************************************** + * + * Return the allocated size (in bytes) of the given instruction descriptor. + */ + +size_t emitter::emitSizeOfInsDsc(instrDesc* id) +{ + if (emitIsScnsInsDsc(id)) + return SMALL_IDSC_SIZE; + + insOpts insOp = id->idInsOpt(); + + switch (insOp) + { + case INS_OPTS_JIRL: + case INS_OPTS_J_cond: + case INS_OPTS_J: + return sizeof(instrDescJmp); + + case INS_OPTS_C: + if (id->idIsLargeCall()) + { + /* Must be a "fat" call descriptor */ + return sizeof(instrDescCGCA); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + return sizeof(instrDesc); + } + + case INS_OPTS_I: + case INS_OPTS_RC: + case INS_OPTS_RL: + case INS_OPTS_RELOC: + case INS_OPTS_NONE: + return sizeof(instrDesc); + default: + NO_WAY("unexpected instruction descriptor format"); + break; + } +} + +inline bool emitter::emitInsMayWriteToGCReg(instruction ins) +{ + assert(ins != INS_invalid); + // NOTE: please reference the file "instrsloongarch64.h" for details !!! + return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false; +} + +bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) +{ + if (!id->idIsLclVar()) + return false; + + instruction ins = id->idIns(); + + // This list is related to the list of instructions used to store local vars in emitIns_S_R(). + // We don't accept writing to float local vars. + + switch (ins) + { + case INS_st_d: + case INS_st_w: + case INS_st_b: + case INS_st_h: + case INS_stptr_d: + case INS_stx_d: + case INS_stx_w: + case INS_stx_b: + case INS_stx_h: + return true; + + default: + return false; + } +} + +#define LD 1 +#define ST 2 + +// clang-format off +/*static*/ const BYTE CodeGenInterface::instInfo[] = +{ + #define INST(id, nm, info, e1) info, + #include "instrs.h" +}; +// clang-format on + +//------------------------------------------------------------------------ +// emitInsLoad: Returns true if the instruction is some kind of load instruction. +// +bool emitter::emitInsIsLoad(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & LD) != 0; + else + return false; +} + +//------------------------------------------------------------------------ +// emitInsIsStore: Returns true if the instruction is some kind of store instruction. +// +bool emitter::emitInsIsStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & ST) != 0; + else + return false; +} + +//------------------------------------------------------------------------- +// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction. +// +bool emitter::emitInsIsLoadOrStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0; + else + return false; +} + +#undef LD +#undef ST + +/***************************************************************************** + * + * Returns the specific encoding of the given CPU instruction. + */ + +inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) +{ + code_t code = BAD_CODE; + + // clang-format off + const static code_t insCode[] = + { + #define INST(id, nm, info, e1) e1, + #include "instrs.h" + }; + // clang-format on + + code = insCode[ins]; + + assert((code != BAD_CODE)); + + return code; +} + +/**************************************************************************** + * + * Add an instruction with no operands. + */ + +void emitter::emitIns(instruction ins) +{ + // instrDesc* id = emitNewInstrSmall(EA_8BYTE); + instrDesc* id = emitNewInstr(EA_8BYTE); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(emitInsCode(ins)); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an Load/Store instruction(s): base+offset and base-addr-computing if needed. + * For referencing a stack-based local variable and a register + * + * Special notes for LoongArch64: + * The parameter `offs` has special info. + * The real value of `offs` is positive. + * If the `offs` is negtive which its real value abs(offs), + * the negtive `offs` is special for optimizing the large offset which >2047. + * when offs >2047 we can't encode one instruction to load/store the data, + * if there are several load/store at this case, you have to repeat the similar + * large offs with reduntant instructions and maybe eat up the `SC_IG_BUFFER_SIZE`. + * + * Optimize the following: + * lu12i.w x0, 0x0 + * ori x0, x0, 0x9ac + * add.d x0, x0, fp + * fst.s fa0, x0, 0 + * + * For the offs within range [0,0x7ff], using one instruction: + * ori x0, x0, offs + * For the offs within range [0x1000,0xffffffff], using two instruction + * lu12i.w x0, offs-hi-20bits + * ori x0, x0, offs-low-12bits + * + * Store/Load the data: + * fstx.s fa0, x0, fp + * + * If the store/load are repeated, + * addi_d x0,x0,sizeof(type) + * fstx.s fa0, x0, fp + * + */ +void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +{ + ssize_t imm; + + emitAttr size = EA_SIZE(attr); + +#ifdef DEBUG + switch (ins) + { + case INS_st_b: + case INS_st_h: + + case INS_st_w: + case INS_fst_s: + + case INS_st_d: + case INS_fst_d: + break; + + default: + NYI("emitIns_S_R"); + return; + + } // end switch (ins) +#endif + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + imm = offs < 0 ? -offs - 8 : base + offs; + + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + reg2 = offs < 0 ? REG_R21 : reg2; + offs = offs < 0 ? -offs - 8 : offs; + + if ((-2048 <= imm) && (imm < 2048)) + { + // regs[1] = reg2; + } + else + { + ssize_t imm3 = imm & 0x800; + ssize_t imm2 = imm + imm3; + assert(isValidSimm20(imm2 >> 12)); + emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12); + + emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2); + + imm2 = imm2 & 0x7ff; + imm = imm3 ? imm2 - imm3 : imm2; + + reg2 = REG_RA; + } + + instrDesc* id = emitNewInstr(attr); + + id->idReg1(reg1); + + id->idReg2(reg2); + + id->idIns(ins); + + code_t code = emitInsCode(ins); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)reg2 << 5; + code |= (code_t)(imm & 0xfff) << 10; + + id->idAddr()->iiaSetInstrEncode(code); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/* + * Special notes for `offs`, please see the comment for `emitter::emitIns_S_R`. + */ +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +{ + ssize_t imm; + + emitAttr size = EA_SIZE(attr); + +#ifdef DEBUG + switch (ins) + { + case INS_ld_b: + case INS_ld_bu: + + case INS_ld_h: + case INS_ld_hu: + + case INS_ld_w: + case INS_ld_wu: + case INS_fld_s: + + case INS_ld_d: + case INS_fld_d: + + break; + + case INS_lea: + assert(size == EA_8BYTE); + break; + + default: + NYI("emitIns_R_S"); + return; + + } // end switch (ins) +#endif + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + imm = offs < 0 ? -offs - 8 : base + offs; + + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + reg2 = offs < 0 ? REG_R21 : reg2; + offs = offs < 0 ? -offs - 8 : offs; + + reg1 = (regNumber)((char)reg1 & 0x1f); + code_t code; + if ((-2048 <= imm) && (imm < 2048)) + { + if (ins == INS_lea) + { + ins = INS_addi_d; + } + code = emitInsCode(ins); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)reg2 << 5; + code |= (imm & 0xfff) << 10; + } + else + { + if (ins == INS_lea) + { + assert(isValidSimm20(imm >> 12)); + emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm >> 12); + ssize_t imm2 = imm & 0xfff; + emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_RA, REG_RA, imm2); + + ins = INS_add_d; + code = emitInsCode(ins); + code |= (code_t)reg1; + code |= (code_t)reg2 << 5; + code |= (code_t)REG_RA << 10; + } + else + { + ssize_t imm3 = imm & 0x800; + ssize_t imm2 = imm + imm3; + assert(isValidSimm20(imm2 >> 12)); + emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12); + + emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2); + + imm2 = imm2 & 0x7ff; + imm3 = imm3 ? imm2 - imm3 : imm2; + code = emitInsCode(ins); + code |= (code_t)reg1; + code |= (code_t)REG_RA << 5; + code |= (code_t)(imm3 & 0xfff) << 10; + } + } + + instrDesc* id = emitNewInstr(attr); + + id->idReg1(reg1); + + id->idIns(ins); + + id->idAddr()->iiaSetInstrEncode(code); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a single immediate value. + */ + +void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) +{ + code_t code = emitInsCode(ins); + + switch (ins) + { + case INS_b: + case INS_bl: + assert(!(imm & 0x3)); + code |= ((imm >> 18) & 0x3ff); // offs[25:16] + code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0] + break; + case INS_dbar: + case INS_ibar: + assert((0 <= imm) && (imm <= 0x7fff)); + code |= (imm & 0x7fff); // hint + break; + default: + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs) +{ +#ifdef DEBUG + switch (ins) + { + case INS_bceqz: + case INS_bcnez: + break; + + default: + unreached(); + } +#endif + + code_t code = emitInsCode(ins); + + assert(!(offs & 0x3)); + assert(!(cc >> 3)); + code |= ((cc & 0x7) << 5); // cj + code |= ((offs >> 18) & 0x1f); // offs[20:16] + code |= ((offs >> 2) & 0xffff) << 10; // offs[15:0] + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing a register and a constant. + */ + +void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + switch (ins) + { + case INS_lu12i_w: + case INS_lu32i_d: + case INS_pcaddi: + case INS_pcalau12i: + case INS_pcaddu12i: + case INS_pcaddu18i: + assert(isGeneralRegister(reg)); + assert((-524288 <= imm) && (imm < 524288)); + + code |= reg; // rd + code |= (imm & 0xfffff) << 5; // si20 + break; + case INS_beqz: + case INS_bnez: + assert(isGeneralRegisterOrR0(reg)); + assert(!(imm & 0x3)); + assert((-1048576 <= (imm >> 2)) && ((imm >> 2) <= 1048575)); + + code |= ((imm >> 18) & 0x1f); // offs[20:16] + code |= reg << 5; // rj + code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0] + break; + case INS_movfr2cf: + assert(isFloatReg(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg & 0x1f) << 5; // fj + code |= imm; // cc + break; + case INS_movcf2fr: + assert(isFloatReg(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg & 0x1f); // fd + code |= imm << 5; // cc + break; + case INS_movgr2cf: + assert(isGeneralRegister(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= reg << 5; // rj + code |= imm; // cc + break; + case INS_movcf2gr: + assert(isGeneralRegister(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= reg; // rd + code |= imm << 5; // cc + break; + default: + unreached(); + break; + } // end switch (ins) + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// insOpts -- The instruction options +// +void emitter::emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) +{ + assert(IsMovInstruction(ins)); + + if (!canSkip || (dstReg != srcReg)) + { + if ((EA_4BYTE == attr) && (INS_mov == ins)) + emitIns_R_R_I(INS_slli_w, attr, dstReg, srcReg, 0); + else + emitIns_R_R(ins, attr, dstReg, srcReg); + } +} + +/***************************************************************************** + * + * Add an instruction referencing two registers + */ + +void emitter::emitIns_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + if (INS_mov == ins) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1; // rd + code |= reg2 << 5; // rj + } + else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) + { +#ifdef DEBUG + switch (ins) + { + case INS_ext_w_b: + case INS_ext_w_h: + case INS_clo_w: + case INS_clz_w: + case INS_cto_w: + case INS_ctz_w: + case INS_clo_d: + case INS_clz_d: + case INS_cto_d: + case INS_ctz_d: + case INS_revb_2h: + case INS_revb_4h: + case INS_revb_2w: + case INS_revb_d: + case INS_revh_2w: + case INS_revh_d: + case INS_bitrev_4b: + case INS_bitrev_8b: + case INS_bitrev_w: + case INS_bitrev_d: + case INS_rdtimel_w: + case INS_rdtimeh_w: + case INS_rdtime_d: + case INS_cpucfg: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --1!"); + } +#endif + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1; // rd + code |= reg2 << 5; // rj + } + else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1 << 5; // rj + code |= reg2 << 10; // rk + } + else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) + { +#ifdef DEBUG + switch (ins) + { + case INS_fabs_s: + case INS_fabs_d: + case INS_fneg_s: + case INS_fneg_d: + case INS_fsqrt_s: + case INS_fsqrt_d: + case INS_frsqrt_s: + case INS_frsqrt_d: + case INS_frecip_s: + case INS_frecip_d: + case INS_flogb_s: + case INS_flogb_d: + case INS_fclass_s: + case INS_fclass_d: + case INS_fcvt_s_d: + case INS_fcvt_d_s: + case INS_ffint_s_w: + case INS_ffint_s_l: + case INS_ffint_d_w: + case INS_ffint_d_l: + case INS_ftint_w_s: + case INS_ftint_w_d: + case INS_ftint_l_s: + case INS_ftint_l_d: + case INS_ftintrm_w_s: + case INS_ftintrm_w_d: + case INS_ftintrm_l_s: + case INS_ftintrm_l_d: + case INS_ftintrp_w_s: + case INS_ftintrp_w_d: + case INS_ftintrp_l_s: + case INS_ftintrp_l_d: + case INS_ftintrz_w_s: + case INS_ftintrz_w_d: + case INS_ftintrz_l_s: + case INS_ftintrz_l_d: + case INS_ftintrne_w_s: + case INS_ftintrne_w_d: + case INS_ftintrne_l_s: + case INS_ftintrne_l_d: + case INS_frint_s: + case INS_frint_d: + case INS_fmov_s: + case INS_fmov_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --2!"); + } +#endif + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + } + else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) + { +#ifdef DEBUG + switch (ins) + { + case INS_movgr2fr_w: + case INS_movgr2fr_d: + case INS_movgr2frh_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --3!"); + } +#endif + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= (reg1 & 0x1f); // fd + code |= reg2 << 5; // rj + } + else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) + { +#ifdef DEBUG + switch (ins) + { + case INS_movfr2gr_s: + case INS_movfr2gr_d: + case INS_movfrh2gr_s: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!"); + } +#endif + assert(isGeneralRegisterOrR0(reg1)); + assert(isFloatReg(reg2)); + code |= reg1; // rd + code |= (reg2 & 0x1f) << 5; // fj + } + else if ((INS_dneg == ins) || (INS_neg == ins)) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + // sub_d rd, zero, rk + // sub_w rd, zero, rk + code |= reg1; // rd + code |= reg2 << 10; // rk + } + else if (INS_not == ins) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + // nor rd, rj, zero + code |= reg1; // rd + code |= reg2 << 5; // rj + } + else + { + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers and a constant. + */ + +void emitter::emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) + { +#ifdef DEBUG + switch (ins) + { + case INS_slli_w: + case INS_srli_w: + case INS_srai_w: + case INS_rotri_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --1!"); + } +#endif + + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((0 <= imm) && (imm <= 0x1f)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0x1f) << 10; // ui5 + } + else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) + { +#ifdef DEBUG + switch (ins) + { + case INS_slli_d: + case INS_srli_d: + case INS_srai_d: + case INS_rotri_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --2!"); + } +#endif + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((0 <= imm) && (imm <= 0x3f)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0x3f) << 10; // ui6 + } + else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || + ((INS_st_b <= ins) && (ins <= INS_st_d))) + { +#ifdef DEBUG + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || + ((INS_st_b <= ins) && (ins <= INS_st_d))) + { + switch (ins) + { + case INS_addi_w: + case INS_addi_d: + case INS_lu52i_d: + case INS_slti: + case INS_ld_b: + case INS_ld_h: + case INS_ld_w: + case INS_ld_d: + case INS_ld_bu: + case INS_ld_hu: + case INS_ld_wu: + case INS_st_b: + case INS_st_h: + case INS_st_w: + case INS_st_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --3!"); + } + + assert((-2048 <= imm) && (imm <= 2047)); + } + else if (ins == INS_sltui) + { + assert((0 <= imm) && (imm <= 0x7ff)); + } + else + { + switch (ins) + { + case INS_andi: + case INS_ori: + case INS_xori: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --4!"); + } + assert((0 <= imm) && (imm <= 0xfff)); + } +#endif + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0xfff) << 10; // si12 or ui12 + } + else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) + { +#ifdef DEBUG + switch (ins) + { + case INS_fld_s: + case INS_fld_d: + case INS_fst_s: + case INS_fst_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --5!"); + } +#endif + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-2048 <= imm) && (imm <= 2047)); + + code |= reg1 & 0x1f; // fd + code |= reg2 << 5; // rj + code |= (imm & 0xfff) << 10; // si12 + } + else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) + { +#ifdef DEBUG + switch (ins) + { + case INS_ldptr_w: + case INS_ldptr_d: + case INS_ll_w: + case INS_ll_d: + case INS_stptr_w: + case INS_stptr_d: + case INS_sc_w: + case INS_sc_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --6!"); + } +#endif + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-8192 <= imm) && (imm <= 8191)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0x3fff) << 10; // si14 + } + else if ((INS_beq <= ins) && (ins <= INS_bgeu)) + { +#ifdef DEBUG + switch (ins) + { + case INS_beq: + case INS_bne: + case INS_blt: + case INS_bltu: + case INS_bge: + case INS_bgeu: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --7!"); + } +#endif + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(!(imm & 0x3)); + assert((-32768 <= (imm >> 2)) && ((imm >> 2) <= 32767)); + + code |= reg1 << 5; // rj + code |= reg2; // rd + code |= ((imm >> 2) & 0xffff) << 10; // offs16 + } + else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s)) + { +#ifdef DEBUG + switch (ins) + { + case INS_fcmp_caf_s: + case INS_fcmp_cun_s: + case INS_fcmp_ceq_s: + case INS_fcmp_cueq_s: + case INS_fcmp_clt_s: + case INS_fcmp_cult_s: + case INS_fcmp_cle_s: + case INS_fcmp_cule_s: + case INS_fcmp_cne_s: + case INS_fcmp_cor_s: + case INS_fcmp_cune_s: + case INS_fcmp_saf_d: + case INS_fcmp_sun_d: + case INS_fcmp_seq_d: + case INS_fcmp_sueq_d: + case INS_fcmp_slt_d: + case INS_fcmp_sult_d: + case INS_fcmp_sle_d: + case INS_fcmp_sule_d: + case INS_fcmp_sne_d: + case INS_fcmp_sor_d: + case INS_fcmp_sune_d: + case INS_fcmp_caf_d: + case INS_fcmp_cun_d: + case INS_fcmp_ceq_d: + case INS_fcmp_cueq_d: + case INS_fcmp_clt_d: + case INS_fcmp_cult_d: + case INS_fcmp_cle_d: + case INS_fcmp_cule_d: + case INS_fcmp_cne_d: + case INS_fcmp_cor_d: + case INS_fcmp_cune_d: + case INS_fcmp_saf_s: + case INS_fcmp_sun_s: + case INS_fcmp_seq_s: + case INS_fcmp_sueq_s: + case INS_fcmp_slt_s: + case INS_fcmp_sult_s: + case INS_fcmp_sle_s: + case INS_fcmp_sule_s: + case INS_fcmp_sne_s: + case INS_fcmp_sor_s: + case INS_fcmp_sune_s: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --8!"); + } +#endif + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg1 & 0x1f) << 5; // fj + code |= (reg2 & 0x1f) << 10; // fk + code |= imm & 0x7; // cc + } + else if (INS_addu16i_d == ins) + { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-32768 <= imm) && (imm < 32768)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0xffff) << 10; // si16 + } + else if (INS_jirl == ins) + { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-32768 <= imm) && (imm < 32768)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0xffff) << 10; // offs16 + } + else + { + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** +* +* Add an instruction referencing two registers and a constant. +* Also checks for a large immediate that needs a second instruction +* and will load it in reg1 +* +*/ +void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm) +{ + assert(isGeneralRegister(reg1)); + assert(reg1 != reg2); + + bool immFits = true; + +#ifdef DEBUG + switch (ins) + { + case INS_addi_w: + case INS_addi_d: + case INS_ld_d: + immFits = isValidSimm12(imm); + break; + + case INS_andi: + case INS_ori: + case INS_xori: + immFits = (0 <= imm) && (imm <= 0xfff); + break; + + default: + assert(!"Unsupported instruction in emitIns_R_R_Imm"); + } +#endif + + if (immFits) + { + emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // Load 'imm' into the reg1 register + // then issue: 'ins' reg1, reg2, reg1 + // + assert(!EA_IS_RELOC(attr)); + emitIns_I_la(attr, reg1, imm); + assert(ins == INS_ld_d); + emitIns_R_R_R(INS_ldx_d, attr, reg1, reg2, reg1); + } +} + +/***************************************************************************** + * + * Add an instruction referencing three registers. + */ + +void emitter::emitIns_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */ +{ + code_t code = emitInsCode(ins); + + if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || + ((INS_stx_b <= ins) && (ins <= INS_stle_d))) + { +#ifdef DEBUG + switch (ins) + { + case INS_add_w: + case INS_add_d: + case INS_sub_w: + case INS_sub_d: + case INS_and: + case INS_or: + case INS_nor: + case INS_xor: + case INS_andn: + case INS_orn: + + case INS_mul_w: + case INS_mul_d: + case INS_mulh_w: + case INS_mulh_wu: + case INS_mulh_d: + case INS_mulh_du: + case INS_mulw_d_w: + case INS_mulw_d_wu: + case INS_div_w: + case INS_div_wu: + case INS_div_d: + case INS_div_du: + case INS_mod_w: + case INS_mod_wu: + case INS_mod_d: + case INS_mod_du: + + case INS_sll_w: + case INS_srl_w: + case INS_sra_w: + case INS_rotr_w: + case INS_sll_d: + case INS_srl_d: + case INS_sra_d: + case INS_rotr_d: + + case INS_maskeqz: + case INS_masknez: + + case INS_slt: + case INS_sltu: + + case INS_ldx_b: + case INS_ldx_h: + case INS_ldx_w: + case INS_ldx_d: + case INS_ldx_bu: + case INS_ldx_hu: + case INS_ldx_wu: + case INS_stx_b: + case INS_stx_h: + case INS_stx_w: + case INS_stx_d: + + case INS_ldgt_b: + case INS_ldgt_h: + case INS_ldgt_w: + case INS_ldgt_d: + case INS_ldle_b: + case INS_ldle_h: + case INS_ldle_w: + case INS_ldle_d: + case INS_stgt_b: + case INS_stgt_h: + case INS_stgt_w: + case INS_stgt_d: + case INS_stle_b: + case INS_stle_h: + case INS_stle_w: + case INS_stle_d: + + case INS_amswap_w: + case INS_amswap_d: + case INS_amswap_db_w: + case INS_amswap_db_d: + case INS_amadd_w: + case INS_amadd_d: + case INS_amadd_db_w: + case INS_amadd_db_d: + case INS_amand_w: + case INS_amand_d: + case INS_amand_db_w: + case INS_amand_db_d: + case INS_amor_w: + case INS_amor_d: + case INS_amor_db_w: + case INS_amor_db_d: + case INS_amxor_w: + case INS_amxor_d: + case INS_amxor_db_w: + case INS_amxor_db_d: + case INS_ammax_w: + case INS_ammax_d: + case INS_ammax_db_w: + case INS_ammax_db_d: + case INS_ammin_w: + case INS_ammin_d: + case INS_ammin_db_w: + case INS_ammin_db_d: + case INS_ammax_wu: + case INS_ammax_du: + case INS_ammax_db_wu: + case INS_ammax_db_du: + case INS_ammin_wu: + case INS_ammin_du: + case INS_ammin_db_wu: + case INS_ammin_db_du: + + case INS_crc_w_b_w: + case INS_crc_w_h_w: + case INS_crc_w_w_w: + case INS_crc_w_d_w: + case INS_crcc_w_b_w: + case INS_crcc_w_h_w: + case INS_crcc_w_w_w: + case INS_crcc_w_d_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --1!"); + } +#endif + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + + code |= (reg1 /*& 0x1f*/); // rd + code |= (reg2 /*& 0x1f*/) << 5; // rj + code |= (reg3 /*& 0x1f*/) << 10; // rk + } + else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) + { +#ifdef DEBUG + switch (ins) + { + case INS_fadd_s: + case INS_fadd_d: + case INS_fsub_s: + case INS_fsub_d: + case INS_fmul_s: + case INS_fmul_d: + case INS_fdiv_s: + case INS_fdiv_d: + case INS_fmax_s: + case INS_fmax_d: + case INS_fmin_s: + case INS_fmin_d: + case INS_fmaxa_s: + case INS_fmaxa_d: + case INS_fmina_s: + case INS_fmina_d: + case INS_fscaleb_s: + case INS_fscaleb_d: + case INS_fcopysign_s: + case INS_fcopysign_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --2!"); + } +#endif + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert(isFloatReg(reg3)); + + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk + } + else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) + { +#ifdef DEBUG + switch (ins) + { + case INS_fldx_s: + case INS_fldx_d: + case INS_fstx_s: + case INS_fstx_d: + + case INS_fldgt_s: + case INS_fldgt_d: + case INS_fldle_s: + case INS_fldle_d: + case INS_fstgt_s: + case INS_fstgt_d: + case INS_fstle_s: + case INS_fstle_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --3!"); + } +#endif + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + + code |= reg1 & 0x1f; // fd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + } + else + { + NYI_LOONGARCH64("Unsupported instruction in emitIns_R_R_R"); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing three registers and a constant. + */ + +void emitter::emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */) +{ + code_t code = emitInsCode(ins); + + if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) + { +#ifdef DEBUG + switch (ins) + { + case INS_alsl_w: + case INS_alsl_wu: + case INS_alsl_d: + case INS_bytepick_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!"); + } +#endif + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + assert((0 <= imm) && (imm <= 3)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + code |= imm << 15; // sa2 + } + else if (INS_bytepick_d == ins) + { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + assert((0 <= imm) && (imm <= 7)); + + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + code |= imm << 15; // sa3 + } + else if (INS_fsel == ins) + { + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert(isFloatReg(reg3)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk + code |= imm << 15; // ca + } + else + { + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers and two constants. + */ + +void emitter::emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt) +{ + code_t code = emitInsCode(ins); + + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + switch (ins) + { + case INS_bstrins_w: + case INS_bstrpick_w: + code |= (reg1 /*& 0x1f*/); // rd + code |= (reg2 /*& 0x1f*/) << 5; // rj + assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 32)); + code |= (imm1 & 0x1f) << 16; // msbw + code |= (imm2 & 0x1f) << 10; // lsbw + break; + case INS_bstrins_d: + case INS_bstrpick_d: + code |= (reg1 /*& 0x1f*/); // rd + code |= (reg2 /*& 0x1f*/) << 5; // rj + assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 64)); + code |= (imm1 & 0x3f) << 16; // msbd + code |= (imm2 & 0x3f) << 10; // lsbd + break; + default: + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing four registers. + */ + +void emitter::emitIns_R_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) +{ + code_t code = emitInsCode(ins); + + switch (ins) + { + case INS_fmadd_s: + case INS_fmadd_d: + case INS_fmsub_s: + case INS_fmsub_d: + case INS_fnmadd_s: + case INS_fnmadd_d: + case INS_fnmsub_s: + case INS_fnmsub_d: + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert(isFloatReg(reg3)); + assert(isFloatReg(reg4)); + + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk + code |= (reg4 & 0x1f) << 15; // fa + break; + default: + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a register + static member operands. + * Constant is stored into JIT data which is adjacent to code. + * For LOONGARCH64, maybe not the best, here just suports the func-interface. + * + */ +void emitter::emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs) +{ + assert(offs >= 0); + assert(instrDesc::fitsInSmallCns(offs)); // can optimize. + // assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg). + // assert(isGeneralRegister(reg)); while load float the reg is FPR. + + // when id->idIns == bl, for reloc! 4-ins. + // pcaddu12i reg, off-hi-20bits + // addi_d reg, reg, off-lo-12bits + // when id->idIns == load-ins, for reloc! 4-ins. + // pcaddu12i reg, off-hi-20bits + // load reg, offs_lo-12bits(reg) #when ins is load ins. + // + // INS_OPTS_RC: ins == bl placeholders. 3-ins: // TODO-LoongArch64: maybe optimize. + // lu12i_w reg, addr-hi-20bits + // ori reg, reg, addr-lo-12bits + // lu32i_d reg, addr_hi-32bits + // + // INS_OPTS_RC: ins == load. 3-ins: + // lu12i_w at, offs_hi-20bits //NOTE: offs = (int)(offs_hi<<12) + (int)offs_lo + // lu32i_d at, 0xff addr_hi-32bits + // load reg, addr_lo-12bits(reg) #when ins is load ins. + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + assert(reg != REG_R0); // for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. + + id->idSmallCns(offs); // usually is 0. + id->idInsOpt(INS_OPTS_RC); + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } + else + id->idCodeSize(12); // TODO-LoongArch64: maybe optimize. + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + + // TODO-LoongArch64: this maybe deleted. + id->idSetIsBound(); // We won't patch address since we will know the exact distance + // once JIT code and data are allocated together. + + assert(addrReg == REG_NA); // NOTE: for LOONGARCH64, not support addrReg != REG_NA. + + id->idAddr()->iiaFieldHnd = fldHnd; + + appendToCurIG(id); +} + +void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) +{ + NYI_LOONGARCH64("emitIns_R_AR-----unimplemented/unused on LOONGARCH64 yet----"); +} + +// This computes address from the immediate which is relocatable. +void emitter::emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC + assert(ins == INS_bl); // for special. + assert(isGeneralRegister(reg)); + + // INS_OPTS_RELOC: placeholders. 2-ins: + // case:EA_HANDLE_CNS_RELOC + // pcaddu12i reg, off-hi-20bits + // addi_d reg, reg, off-lo-12bits + // case:EA_PTR_DSP_RELOC + // pcaddu12i reg, off-hi-20bits + // ld_d reg, reg, off-lo-12bits + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + assert(reg != REG_R0); // for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. + + id->idInsOpt(INS_OPTS_RELOC); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idCodeSize(8); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Record that a jump instruction uses the short encoding + * + */ +void emitter::emitSetShortJump(instrDescJmp* id) +{ + // TODO-LoongArch64: maybe delete it on future. + NYI_LOONGARCH64("emitSetShortJump-----unimplemented/unused on LOONGARCH64 yet----"); +} + +/***************************************************************************** + * + * Add a label instruction. + */ + +void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + assert(dst->bbFlags & BBF_HAS_LABEL); + + // if for reloc! 4-ins: + // pcaddu12i reg, offset-hi20 + // addi_d reg, reg, offset-lo12 + // + // else: 3-ins: + // lu12i_w reg, dst-hi-20bits + // ori reg, reg, dst-lo-12bits + // bstrins_d reg, zero, msbd, lsbd / lu32i_d reg, 0xff + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsOpt(INS_OPTS_RL); + id->idAddr()->iiaBBlabel = dst; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } + else + id->idCodeSize(12); + + id->idReg1(reg); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + +#ifdef DEBUG + // Mark the catch return + if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET) + { + id->idDebugOnlyInfo()->idCatchRet = true; + } +#endif // DEBUG + + appendToCurIG(id); +} + +void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + NYI_LOONGARCH64("emitIns_J_R-----unimplemented/unused on LOONGARCH64 yet----"); +} + +// NOTE: +// For loongarch64, emitIns_J is just only jump, not include the condition branch! +// The condition branch is the emitIns_J_cond_la(). +// If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by +// INS_OPTS_JIRL. +// +// The arg "instrCount" is two regs's encoding when ins is beq/bne/blt/bltu/bge/bgeu/beqz/bnez. +void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) +{ + if (dst == nullptr) + { // Now this case not used for loongarch64. + assert(instrCount != 0); + assert(ins == INS_b); // when dst==nullptr, ins is INS_b by now. + + assert((-33554432 <= instrCount) && (instrCount < 33554432)); // 0x2000000. + emitIns_I(ins, EA_PTRSIZE, instrCount << 2); // NOTE: instrCount is the number of the instructions. + + return; + } + + // + // INS_OPTS_J: placeholders. 1-ins: if the dst outof-range will be replaced by INS_OPTS_JIRL. + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst + + assert(dst->bbFlags & BBF_HAS_LABEL); + + instrDescJmp* id = emitNewInstrJmp(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + id->idIns(ins); + id->idReg1((regNumber)(instrCount & 0x1f)); + id->idReg2((regNumber)((instrCount >> 5) & 0x1f)); + + id->idInsOpt(INS_OPTS_J); + emitCounts_INS_OPTS_J++; + id->idAddr()->iiaBBlabel = dst; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + } + + id->idjShort = false; + + // TODO-LoongArch64: maybe deleted this. + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + id->idjKeepLong = 1; +#endif // DEBUG + + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; + + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; + +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + id->idCodeSize(4); + + appendToCurIG(id); +} + +// NOTE: +// For loongarch64, emitIns_J_cond_la() is the condition branch. +// NOTE: Only supported short branch so far !!! +// +void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2) +{ + // TODO-LoongArch64: + // Now the emitIns_J_cond_la() is only the short condition branch. + // There is no long condition branch for loongarch64 so far. + // For loongarch64, the long condition branch is like this: + // ---> branch_condition condition_target; //here is the condition branch, short branch is enough. + // ---> jump jump_target; (this supporting the long jump.) + // condition_target: + // ... + // ... + // jump_target: + // + // + // INS_OPTS_J_cond: placeholders. 1-ins. + // ins reg1, reg2, dst + + assert(dst != nullptr); + assert(dst->bbFlags & BBF_HAS_LABEL); + + instrDescJmp* id = emitNewInstrJmp(); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idjShort = false; + + id->idInsOpt(INS_OPTS_J_cond); + id->idAddr()->iiaBBlabel = dst; + + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + id->idjKeepLong = 1; +#endif // DEBUG + + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; + + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; + +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + id->idCodeSize(4); + + appendToCurIG(id); +} + +void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) +{ + assert(!EA_IS_RELOC(size)); + assert(isGeneralRegister(reg)); + // size = EA_SIZE(size); + + if (-1 == (imm >> 11) || 0 == (imm >> 11)) + { + emitIns_R_R_I(INS_addi_w, size, reg, REG_R0, imm); + return; + } + + if (0 == (imm >> 12)) + { + emitIns_R_R_I(INS_ori, size, reg, REG_R0, imm); + return; + } + + instrDesc* id = emitNewInstr(size); + + if ((imm == INT64_MAX) || (imm == 0xffffffff)) + { + // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); + // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); + id->idReg2((regNumber)1); // special for INT64_MAX(ui6=1) or UINT32_MAX(ui6=32); + id->idCodeSize(8); + } + else if (-1 == (imm >> 31) || 0 == (imm >> 31)) + { + // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); + // emitIns_R_R_I(INS_ori, size, reg, reg, imm); + + id->idCodeSize(8); + } + else if (-1 == (imm >> 51) || 0 == (imm >> 51)) + { + // low-32bits. + // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12); + // emitIns_R_R_I(INS_ori, size, reg, reg, imm); + // + // high-20bits. + // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + + id->idCodeSize(12); + } + else + { // 0xffff ffff ffff ffff. + // low-32bits. + // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); + // emitIns_R_R_I(INS_ori, size, reg, reg, imm); + // + // high-32bits. + // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + // emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52)); + + id->idCodeSize(16); + } + + id->idIns(INS_lu12i_w); + id->idReg1(reg); // destination register that will get the constant value. + assert(reg != REG_R0); + + id->idInsOpt(INS_OPTS_I); + + id->idAddr()->iiaAddr = (BYTE*)imm; + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add a call instruction (direct or indirect). + * argSize<0 means that the caller will pop the arguments + * + * The other arguments are interpreted depending on callType as shown: + * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. + * + * EC_FUNC_TOKEN : addr is the method address + * + * If callType is one of these emitCallTypes, addr has to be NULL. + * EC_INDIR_R : "call ireg". + * + * For LOONGARCH xreg, xmul and disp are never used and should always be 0/REG_NA. + * + * Please consult the "debugger team notification" comment in genFnProlog(). + */ + +void emitter::emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di /* = DebugInfo() */, + regNumber ireg /* = REG_NA */, + regNumber xreg /* = REG_NA */, + unsigned xmul /* = 0 */, + ssize_t disp /* = 0 */, + bool isJump /* = false */) +{ + /* Sanity check the arguments depending on callType */ + + assert(callType < EC_COUNT); + assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); + assert(callType < EC_INDIR_R || addr == NULL); + assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); + + // LoongArch64 never uses these + assert(xreg == REG_NA && xmul == 0 && disp == 0); + + // Our stack level should be always greater than the bytes of arguments we push. Just + // a sanity test. + assert((unsigned)abs(argSize) <= codeGen->genStackLevel); + + // Trim out any callee-trashed registers from the live set. + regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); + gcrefRegs &= savedSet; + byrefRegs &= savedSet; + +#ifdef DEBUG + if (EMIT_GC_VERBOSE) + { + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); + dumpConvertedVarSet(emitComp, ptrVars); + printf(", gcrefRegs="); + printRegMaskInt(gcrefRegs); + emitDispRegSet(gcrefRegs); + printf(", byrefRegs="); + printRegMaskInt(byrefRegs); + emitDispRegSet(byrefRegs); + printf("\n"); + } +#endif + + /* Managed RetVal: emit sequence point for the call */ + if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + { + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + } + + /* + We need to allocate the appropriate instruction descriptor based + on whether this is a direct/indirect call, and whether we need to + record an updated set of live GC variables. + */ + instrDesc* id; + + assert(argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + + if (callType >= EC_INDIR_R) + { + /* Indirect call, virtual calls */ + + assert(callType == EC_INDIR_R); + + id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + else + { + /* Helper/static/nonvirtual/function calls (direct or through handle), + and calls to an absolute addr. */ + + assert(callType == EC_FUNC_TOKEN); + + id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + + /* Update the emitter's live GC ref sets */ + + VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + emitThisGCrefRegs = gcrefRegs; + emitThisByrefRegs = byrefRegs; + + id->idSetIsNoGC(emitNoGChelper(methHnd)); + + /* Set the instruction - special case jumping a function */ + instruction ins; + + ins = INS_jirl; // jirl t2 + id->idIns(ins); + + id->idInsOpt(INS_OPTS_C); + // TODO-LoongArch64: maybe optimize. + + // INS_OPTS_C: placeholders. 1/2/4-ins: + // if (callType == EC_INDIR_R) + // jirl REG_R0/REG_RA, ireg, 0 <---- 1-ins + // else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR) + // if reloc: + // //pc + offset_38bits # only when reloc. + // pcaddu18i t2, addr-hi20 + // jilr r0/1,t2,addr-lo18 + // + // else: + // lu12i_w t2, dst_offset_lo32-hi + // ori t2, t2, dst_offset_lo32-lo + // lu32i_d t2, dst_offset_hi32-lo + // jirl REG_R0/REG_RA, t2, 0 + + /* Record the address: method, indirection, or funcptr */ + if (callType == EC_INDIR_R) + { + /* This is an indirect call (either a virtual call or func ptr call) */ + // assert(callType == EC_INDIR_R); + + id->idSetIsCallRegPtr(); + + regNumber reg_jirl = isJump ? REG_R0 : REG_RA; + id->idReg4(reg_jirl); + id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3. + assert(xreg == REG_NA); + + id->idCodeSize(4); + } + else + { + /* This is a simple direct call: "call helper/method/addr" */ + + assert(callType == EC_FUNC_TOKEN); + assert(addr != NULL); + assert((((size_t)addr) & 3) == 0); + + addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 + id->idAddr()->iiaAddr = (BYTE*)addr; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } + else + { + id->idCodeSize(16); + } + } + +#ifdef DEBUG + if (EMIT_GC_VERBOSE) + { + if (id->idIsLargeCall()) + { + printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, + VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); + } + } + + id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token + id->idDebugOnlyInfo()->idCallSig = sigInfo; +#endif // DEBUG + +#ifdef LATE_DISASM + if (addr != nullptr) + { + codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + } +#endif // LATE_DISASM + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Output a call instruction. + */ + +unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) +{ + unsigned char callInstrSize = sizeof(code_t); // 4 bytes + regMaskTP gcrefRegs; + regMaskTP byrefRegs; + + VARSET_TP GCvars(VarSetOps::UninitVal()); + + // Is this a "fat" call descriptor? + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + gcrefRegs = idCall->idcGcrefRegs; + byrefRegs = idCall->idcByrefRegs; + VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + + gcrefRegs = emitDecodeCallGCregs(id); + byrefRegs = 0; + VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); + } + + /* We update the GC info before the call as the variables cannot be + used by the call. Killing variables before the call helps with + boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. + If we ever track aliased variables (which could be used by the + call), we would have to keep them alive past the call. */ + + emitUpdateLiveGCvars(GCvars, dst); +#ifdef DEBUG + // NOTEADD: + // Output any delta in GC variable info, corresponding to the before-call GC var updates done above. + if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + { + emitDispGCVarDelta(); // define in emit.cpp + } +#endif // DEBUG + + assert(id->idIns() == INS_jirl); + if (id->idIsCallRegPtr()) + { // EC_INDIR_R + code = emitInsCode(id->idIns()); + code |= (code_t)id->idReg4(); + code |= (code_t)id->idReg3() << 5; + // the offset default is 0; + *(code_t*)dst = code; + } + else if (id->idIsReloc()) + { + // pc + offset_38bits + // + // pcaddu18i t2, addr-hi20 + // jilr r0/1,t2,addr-lo18 + + *(code_t*)dst = 0x1e00000e; + + size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr. + + int reg2 = (int)addr & 1; + addr = addr ^ 1; + + assert(isValidSimm38(addr - (ssize_t)dst)); + assert((addr & 3) == 0); + + dst += 4; +#ifdef DEBUG + code = emitInsCode(INS_pcaddu18i); + assert((code | (14)) == 0x1e00000e); + assert((int)REG_T2 == 14); + code = emitInsCode(INS_jirl); + assert(code == 0x4c000000); +#endif + *(code_t*)dst = 0x4c000000 | (14 << 5) | reg2; + + emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_LOONGARCH64_JIR); + } + else + { + // lu12i_w t2, dst_offset_lo32-hi // TODO-LoongArch64: maybe optimize. + // ori t2, t2, dst_offset_lo32-lo + // lu32i_d t2, dst_offset_hi32-lo + // jirl t2 + + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + assert((imm >> 32) == 0xff); + + int reg2 = (int)(imm & 1); + imm -= reg2; + + code = emitInsCode(INS_lu12i_w); + code |= (code_t)REG_T2; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + code |= (code_t)REG_T2; + code |= (code_t)REG_T2 << 5; + code |= (code_t)(imm & 0xfff) << 10; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_lu32i_d); + code |= (code_t)REG_T2; + code |= 0xff << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_jirl); + code |= (code_t)reg2; + code |= (code_t)REG_T2 << 5; + // the offset default is 0; + *(code_t*)dst = code; + } + + dst += 4; + + // update volatile regs within emitThisGCrefRegs and emitThisByrefRegs. + if (gcrefRegs != emitThisGCrefRegs) + { + emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + } + if (byrefRegs != emitThisByrefRegs) + { + emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + } + + // If the method returns a GC ref, mark INTRET (A0) appropriately. + if (id->idGCref() == GCT_GCREF) + { + gcrefRegs = emitThisGCrefRegs | RBM_INTRET; + } + else if (id->idGCref() == GCT_BYREF) + { + byrefRegs = emitThisByrefRegs | RBM_INTRET; + } + + // If is a multi-register return method is called, mark INTRET_1 (A1) appropriately + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + if (idCall->idSecondGCref() == GCT_GCREF) + { + gcrefRegs |= RBM_INTRET_1; + } + else if (idCall->idSecondGCref() == GCT_BYREF) + { + byrefRegs |= RBM_INTRET_1; + } + } + + // If the GC register set has changed, report the new set. + if (gcrefRegs != emitThisGCrefRegs) + { + emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + } + // If the Byref register set has changed, report the new set. + if (byrefRegs != emitThisByrefRegs) + { + emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + } + + // Some helper calls may be marked as not requiring GC info to be recorded. + if (!id->idIsNoGC()) + { + // On LOONGARCH64, as on AMD64, we don't change the stack pointer to push/pop args. + // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism + // to record the call for GC info purposes. (It might be best to use an alternate call, + // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.) + emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0); + + // Do we need to record a call location for GC purposes? + // + if (!emitFullGCinfo) + { + emitRecordGCcall(dst, callInstrSize); + } + } + if (id->idIsCallRegPtr()) + { + callInstrSize = 1 << 2; + } + else + { + callInstrSize = id->idIsReloc() ? (2 << 2) : (4 << 2); // INS_OPTS_C: 2/4-ins. + } + + return callInstrSize; +} + +//---------------------------------------------------------------------------------- +// LoongArch64 has an individual implementation for emitJumpDistBind(). +// +// Bind targets of relative jumps/branch to choose the smallest possible encoding. +// LoongArch64 has a small medium, and large encoding. +// +// Even though the small encoding is offset-18bits which lowest 2bits is always 0. +// The small encoding as the default is fit for most cases. +// + +void emitter::emitJumpDistBind() +{ +#ifdef DEBUG + if (emitComp->verbose) + { + printf("*************** In emitJumpDistBind()\n"); + } + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nInstruction list before jump distance binding:\n\n"); + emitDispIGlist(true); + } +#endif + + instrDescJmp* jmp; + + UNATIVE_OFFSET adjIG; + UNATIVE_OFFSET adjSJ; + insGroup* lstIG; +#ifdef DEBUG + insGroup* prologIG = emitPrologIG; +#endif // DEBUG + + // NOTE: + // bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J; + // bit1 of isLinkingEnd_LA: indicating not needed updating the size while emitTotalCodeSize <= (0x7fff << 2) or had + // updated; + unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0; + + UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot. + // small jump max. neg distance + NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; + // small jump max. pos distance + NATIVE_OFFSET psd = + B_DIST_SMALL_MAX_POS - + emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + +/*****************************************************************************/ +/* If the default small encoding is not enough, we start again here. */ +/*****************************************************************************/ + +AGAIN: + +#ifdef DEBUG + emitCheckIGoffsets(); +#endif + +#ifdef DEBUG + insGroup* lastIG = nullptr; + instrDescJmp* lastSJ = nullptr; +#endif + + lstIG = nullptr; + adjSJ = 0; + adjIG = 0; + + for (jmp = emitJumpList; jmp; jmp = jmp->idjNext) + { + insGroup* jmpIG; + insGroup* tgtIG; + + UNATIVE_OFFSET jsz; // size of the jump instruction in bytes + + NATIVE_OFFSET extra; // How far beyond the short jump range is this jump offset? + UNATIVE_OFFSET srcInstrOffs; // offset of the source instruction of the jump + UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative + // offset of the jump + UNATIVE_OFFSET dstOffs; + NATIVE_OFFSET jmpDist; // the relative jump distance, as it will be encoded + +/* Make sure the jumps are properly ordered */ + +#ifdef DEBUG + assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ)); + lastSJ = (lastIG == jmp->idjIG) ? jmp : nullptr; + + assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG || + emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow + lastIG = jmp->idjIG; +#endif // DEBUG + + /* Get hold of the current jump size */ + + jsz = jmp->idCodeSize(); + + /* Get the group the jump is in */ + + jmpIG = jmp->idjIG; + + /* Are we in a group different from the previous jump? */ + + if (lstIG != jmpIG) + { + /* Were there any jumps before this one? */ + + if (lstIG) + { + /* Adjust the offsets of the intervening blocks */ + + do + { + lstIG = lstIG->igNext; + assert(lstIG); +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + lstIG->igOffs += adjIG; + assert(IsCodeAligned(lstIG->igOffs)); + } while (lstIG != jmpIG); + } + + /* We've got the first jump in a new group */ + adjSJ = 0; + lstIG = jmpIG; + } + + /* Apply any local size adjustment to the jump's relative offset */ + jmp->idjOffs += adjSJ; + + // If this is a jump via register, the instruction size does not change, so we are done. + CLANG_FORMAT_COMMENT_ANCHOR; + + /* Have we bound this jump's target already? */ + + if (jmp->idIsBound()) + { + /* Does the jump already have the smallest size? */ + + if (jmp->idjShort) + { + // We should not be jumping/branching across funclets/functions + emitCheckFuncletBranch(jmp, jmpIG); + + continue; + } + + tgtIG = jmp->idAddr()->iiaIGlabel; + } + else + { + /* First time we've seen this label, convert its target */ + CLANG_FORMAT_COMMENT_ANCHOR; + + tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel); + +#ifdef DEBUG + if (EMITVERBOSE) + { + if (tgtIG) + { + printf(" to %s\n", emitLabelString(tgtIG)); + } + else + { + printf("-- ERROR, no emitter cookie for " FMT_BB "; it is probably missing BBF_HAS_LABEL.\n", + jmp->idAddr()->iiaBBlabel->bbNum); + } + } + assert(tgtIG); +#endif // DEBUG + + /* Record the bound target */ + + jmp->idAddr()->iiaIGlabel = tgtIG; + jmp->idSetIsBound(); + } + + // We should not be jumping/branching across funclets/functions + emitCheckFuncletBranch(jmp, jmpIG); + + /* + In the following distance calculations, if we're not actually + scheduling the code (i.e. reordering instructions), we can + use the actual offset of the jump (rather than the beg/end of + the instruction group) since the jump will not be moved around + and thus its offset is accurate. + + First we need to figure out whether this jump is a forward or + backward one; to do this we simply look at the ordinals of the + group that contains the jump and the target. + */ + + srcInstrOffs = jmpIG->igOffs + jmp->idjOffs; + + /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */ + dstOffs = tgtIG->igOffs; + + srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch + + if (jmpIG->igNum < tgtIG->igNum) + { + /* Forward jump */ + + /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between + here and the target could be shortened, causing the actual distance to shrink. + */ + + dstOffs += adjIG; + + /* Compute the distance estimate */ + + jmpDist = dstOffs - srcEncodingOffs; + + /* How much beyond the max. short distance does the jump go? */ + + extra = jmpDist - psd; + +#if DEBUG_EMIT + assert(jmp->idDebugOnlyInfo() != nullptr); + if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + if (INTERESTING_JUMP_NUM == 0) + { + printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum); + } + printf("[1] Jump block is at %08X\n", jmpIG->igOffs); + printf("[1] Jump reloffset is %04X\n", jmp->idjOffs); + printf("[1] Jump source is at %08X\n", srcEncodingOffs); + printf("[1] Label block is at %08X\n", dstOffs); + printf("[1] Jump dist. is %04X\n", jmpDist); + if (extra > 0) + { + printf("[1] Dist excess [S] = %d \n", extra); + } + } + if (EMITVERBOSE) + { + printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp), + jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist); + } +#endif // DEBUG_EMIT + + assert(jmpDist >= 0); // Forward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(jmpDist); + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + + if (ins < + INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + { + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + extra = 8; + } + } + else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. + { + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) + continue; + + extra = 4; + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + } + else + { + assert(ins == INS_b || ins == INS_bl); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JIRL); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjSJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + } + else + { + /* Backward jump */ + + /* Compute the distance estimate */ + + jmpDist = srcEncodingOffs - dstOffs; + + /* How much beyond the max. short distance does the jump go? */ + + extra = jmpDist + nsd; + +#if DEBUG_EMIT + assert(jmp->idDebugOnlyInfo() != nullptr); + if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + if (INTERESTING_JUMP_NUM == 0) + { + printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum); + } + printf("[2] Jump block is at %08X\n", jmpIG->igOffs); + printf("[2] Jump reloffset is %04X\n", jmp->idjOffs); + printf("[2] Jump source is at %08X\n", srcEncodingOffs); + printf("[2] Label block is at %08X\n", dstOffs); + printf("[2] Jump dist. is %04X\n", jmpDist); + if (extra > 0) + { + printf("[2] Dist excess [S] = %d \n", extra); + } + } + if (EMITVERBOSE) + { + printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp), + jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist); + } +#endif // DEBUG_EMIT + + assert(jmpDist >= 0); // Backward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative! + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + + if (ins < + INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + { + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + extra = 8; + } + } + else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. + { + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) + continue; + + extra = 4; + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + } + else + { + assert(ins == INS_b || ins == INS_bl); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JIRL); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjSJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + } + } // end for each jump + + if ((isLinkingEnd_LA & 0x3) < 0x2) + { + // indicating the instrDescJmp's size of the type INS_OPTS_J had updated + // after the first round and should iterate again to update. + isLinkingEnd_LA = 0x2; + + // Adjust offsets of any remaining blocks. + for (; lstIG;) + { + lstIG = lstIG->igNext; + if (!lstIG) + { + break; + } +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + + lstIG->igOffs += adjIG; + + assert(IsCodeAligned(lstIG->igOffs)); + } + goto AGAIN; + } + +#ifdef DEBUG + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nLabels list after the jump dist binding:\n\n"); + emitDispIGlist(false); + } + + emitCheckIGoffsets(); +#endif // DEBUG +} + +/***************************************************************************** + * + * Emit a 32-bit LOONGARCH64 instruction + */ + +/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) +{ + assert(sizeof(code_t) == 4); + BYTE* dstRW = dst + writeableOffset; + *((code_t*)dstRW) = code; + + return sizeof(code_t); +} + +/***************************************************************************** +* + * Append the machine code corresponding to the given instruction descriptor + * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' + * is the instruction group that contains the instruction. Updates '*dp' to + * point past the generated code, and returns the size of the instruction + * descriptor in bytes. + */ + +size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) +{ + BYTE* dst = *dp; + BYTE* dst2 = dst; // addr for updating gc info if needed. + code_t code = 0; + instruction ins; + size_t sz; // = emitSizeOfInsDsc(id); + +#ifdef DEBUG +#if DUMP_GC_TABLES + bool dspOffs = emitComp->opts.dspGCtbls; +#else + bool dspOffs = !emitComp->opts.disDiffable; +#endif +#endif // DEBUG + + assert(REG_NA == (int)REG_NA); + + insOpts insOp = id->idInsOpt(); + + switch (insOp) + { + case INS_OPTS_RELOC: + { + // case:EA_HANDLE_CNS_RELOC + // pcaddu12i reg, off-hi-20bits + // addi_d reg, reg, off-lo-12bits + // case:EA_PTR_DSP_RELOC + // pcaddu12i reg, off-hi-20bits + // ld_d reg, reg, off-lo-12bits + + regNumber reg1 = id->idReg1(); + + *(code_t*)dst = 0x1c000000 | (code_t)reg1; + + dst2 = dst; + dst += 4; + +#ifdef DEBUG + code = emitInsCode(INS_pcaddu12i); + assert(code == 0x1c000000); + code = emitInsCode(INS_addi_d); + assert(code == 0x02c00000); + code = emitInsCode(INS_ld_d); + assert(code == 0x28c00000); +#endif + + if (id->idIsCnsReloc()) + { + ins = INS_addi_d; + *(code_t*)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1 << 5); + } + else + { + assert(id->idIsDspReloc()); + ins = INS_ld_d; + *(code_t*)dst = 0x28c00000 | (code_t)reg1 | (code_t)(reg1 << 5); + } + + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), reg1, dst); + } + else + { + emitGCregDeadUpd(reg1, dst); + } + + dst += 4; + + emitRecordRelocation(dst2, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC); + + dst2 += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_I: + { + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + regNumber reg1 = id->idReg1(); + dst2 += 4; + + switch (id->idCodeSize()) + { + case 8: + { + if (id->idReg2()) + { // special for INT64_MAX or UINT32_MAX; + code = emitInsCode(INS_addi_d); + code |= (code_t)reg1; + code |= (code_t)REG_R0; + code |= 0xfff << 10; + + *(code_t*)dst = code; + dst += 4; + + ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32; + code = emitInsCode(INS_srli_d); + code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10)); + *(code_t*)dst = code; + } + else + { + code = emitInsCode(INS_lu12i_w); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + code |= (code_t)reg1; + code |= (code_t)reg1 << 5; + code |= (code_t)(imm & 0xfff) << 10; + *(code_t*)dst = code; + } + break; + } + case 12: + { + code = emitInsCode(INS_lu12i_w); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + code |= (code_t)reg1; + code |= (code_t)reg1 << 5; + code |= (code_t)(imm & 0xfff) << 10; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_lu32i_d); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 32) & 0xfffff) << 5; + + *(code_t*)dst = code; + + break; + } + case 16: + { + code = emitInsCode(INS_lu12i_w); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + code |= (code_t)reg1; + code |= (code_t)reg1 << 5; + code |= (code_t)(imm & 0xfff) << 10; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_lu32i_d); + code |= (code_t)reg1; + code |= (code_t)((imm >> 32) & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_lu52i_d); + code |= (code_t)reg1; + code |= (code_t)(reg1) << 5; + code |= ((code_t)(imm >> 52) & 0xfff) << 10; + + *(code_t*)dst = code; + + break; + } + default: + unreached(); + break; + } + + ins = INS_ori; + dst += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_RC: + { + // Reference to JIT data + + // when id->idIns == bl, for reloc! + // pcaddu12i r21, off-hi-20bits + // addi_d reg, r21, off-lo-12bits + // when id->idIns == load-ins + // pcaddu12i r21, off-hi-20bits + // load reg, offs_lo-12bits(r21) #when ins is load ins. + // + // when id->idIns == bl + // lu12i_w r21, addr-hi-20bits + // ori reg, r21, addr-lo-12bits + // lu32i_d reg, addr_hi-32bits + // + // when id->idIns == load-ins + // lu12i_w r21, offs_hi-20bits + // lu32i_d r21, 0xff addr_hi-32bits + // load reg, addr_lo-12bits(r21) + assert(id->idAddr()->iiaIsJitDataOffset()); + assert(id->idGCref() == GCT_NONE); + + int doff = id->idAddr()->iiaGetJitDataOffset(); + assert(doff >= 0); + + ssize_t imm = emitGetInsSC(id); + assert((imm >= 0) && (imm < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0. + + unsigned dataOffs = (unsigned)(doff + imm); + + assert(dataOffs < emitDataSize()); + + ins = id->idIns(); + regNumber reg1 = id->idReg1(); + + if (id->idIsReloc()) + { + // get the addr-offset of the data. + imm = (ssize_t)emitConsBlock - (ssize_t)dst + dataOffs; + assert(imm > 0); + assert(!(imm & 3)); + + doff = (int)(imm & 0x800); + imm += doff; + assert(isValidSimm20(imm >> 12)); + + doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. + +#ifdef DEBUG + code = emitInsCode(INS_pcaddu12i); + assert(code == 0x1c000000); +#endif + code = 0x1c000000 | 21; + *(code_t*)dst = code | (((code_t)imm & 0xfffff000) >> 7); + dst += 4; + + if (ins == INS_bl) + { + assert(isGeneralRegister(reg1)); + ins = INS_addi_d; +#ifdef DEBUG + code = emitInsCode(INS_addi_d); + assert(code == 0x02c00000); +#endif + code = 0x02c00000 | (21 << 5); + *(code_t*)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10); + } + else + { + code = emitInsCode(ins); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)REG_R21 << 5; // NOTE:here must be REG_R21 !!! + code |= (code_t)(doff & 0xfff) << 10; + *(code_t*)dst = code; + } + dst += 4; + dst2 = dst; + } + else + { + // get the addr of the data. + imm = (ssize_t)emitConsBlock + dataOffs; + + code = emitInsCode(INS_lu12i_w); + if (ins == INS_bl) + { + assert((imm >> 32) == 0xff); + + doff = (int)imm >> 12; + code |= (code_t)REG_R21; + code |= ((code_t)doff & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + code |= (code_t)reg1; + code |= (code_t)REG_R21 << 5; + code |= (code_t)(imm & 0xfff) << 10; + *(code_t*)dst = code; + dst += 4; + dst2 = dst; + + ins = INS_lu32i_d; + code = emitInsCode(INS_lu32i_d); + code |= (code_t)reg1; + code |= 0xff << 5; + + *(code_t*)dst = code; + dst += 4; + } + else + { + doff = (int)(imm & 0x800); + imm += doff; + doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. + + assert((imm >> 32) == 0xff); + + dataOffs = (unsigned)(imm >> 12); // addr-hi-20bits. + code |= (code_t)REG_R21; + code |= ((code_t)dataOffs & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_lu32i_d); + code |= (code_t)REG_R21; + code |= 0xff << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(ins); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)REG_R21 << 5; + code |= (code_t)(doff & 0xfff) << 10; + + *(code_t*)dst = code; + dst += 4; + dst2 = dst; + } + } + + sz = sizeof(instrDesc); + } + break; + + case INS_OPTS_RL: + { + // if for reloc! + // pcaddu12i reg, offset-hi20 + // addi_d reg, reg, offset-lo12 + // + // else: // TODO-LoongArch64:optimize. + // lu12i_w reg, dst-hi-12bits + // ori reg, reg, dst-lo-12bits + // lu32i_d reg, dst-hi-32bits + + insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel); + id->idAddr()->iiaIGlabel = tgtIG; + + regNumber reg1 = id->idReg1(); + assert(isGeneralRegister(reg1)); + + if (id->idIsReloc()) + { + ssize_t imm = (ssize_t)tgtIG->igOffs; + imm = (ssize_t)emitCodeBlock + imm - (ssize_t)dst; + assert((imm & 3) == 0); + + int doff = (int)(imm & 0x800); + imm += doff; + assert(isValidSimm20(imm >> 12)); + + doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. + + code = 0x1c000000; + *(code_t*)dst = code | (code_t)reg1 | ((imm & 0xfffff000) >> 7); + dst += 4; + dst2 = dst; +#ifdef DEBUG + code = emitInsCode(INS_pcaddu12i); + assert(code == 0x1c000000); + code = emitInsCode(INS_addi_d); + assert(code == 0x02c00000); +#endif + *(code_t*)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1 << 5) | ((doff & 0xfff) << 10); + ins = INS_addi_d; + } + else + { + ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock; + assert((imm >> 32) == 0xff); + + code = emitInsCode(INS_lu12i_w); + code |= (code_t)REG_R21; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + code |= (code_t)reg1; + code |= (code_t)REG_R21 << 5; + code |= (code_t)(imm & 0xfff) << 10; + *(code_t*)dst = code; + dst += 4; + dst2 = dst; + + ins = INS_lu32i_d; + code = emitInsCode(INS_lu32i_d); + code |= (code_t)reg1; + code |= 0xff << 5; + + *(code_t*)dst = code; + } + + dst += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_JIRL: + // case_1: <----------from INS_OPTS_J: + // xor r21,reg1,reg2 | bne/beq _next | bcnez/bceqz _next + // bnez/beqz dst | b dst | b dst + //_next: + // + // case_2: <---------- TODO-LoongArch64: from INS_OPTS_J: + // bnez/beqz _next: + // pcaddi r21,off-hi + // jirl r0,r21,off-lo + //_next: + // + // case_3: <----------INS_OPTS_JIRL: //not used by now !!! + // b dst + // + // case_4: <----------INS_OPTS_JIRL: //not used by now !!! + // pcaddi r21,off-hi + // jirl r0,r21,off-lo + // + { + instrDescJmp* jmp = (instrDescJmp*)id; + + regNumber reg1 = id->idReg1(); + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); + imm -= 4; + + assert((imm & 0x3) == 0); + + ins = jmp->idIns(); + assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!! + switch (jmp->idCodeSize()) + { + case 8: + { + regNumber reg2 = id->idReg2(); + assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); + + if ((INS_beq == ins) || (INS_bne == ins)) + { + if ((-0x400000 <= imm) && (imm < 0x400000)) + { + code = emitInsCode(INS_xor); + code |= (code_t)REG_R21; + code |= (code_t)reg1 << 5; + code |= (code_t)reg2 << 10; + + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez); + code |= (code_t)REG_R21 << 5; + code |= (((code_t)imm << 8) & 0x3fffc00); + code |= (((code_t)imm >> 18) & 0x1f); + + *(code_t*)dst = code; + dst += 4; + } + else + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bne & 0xfffe) == INS_beq); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ + code |= 0x800; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + + *(code_t*)dst = code; + dst += 4; + } + } + else if ((INS_bceqz == ins) || (INS_bcnez == ins)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bcnez & 0xfffe) == INS_bceqz); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)reg1) << 5; + code |= 0x800; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + + *(code_t*)dst = code; + dst += 4; + } + else if ((INS_blt <= ins) && (ins <= INS_bgeu)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bge & 0xfffe) == INS_blt); + assert((INS_bgeu & 0xfffe) == INS_bltu); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ + code |= 0x800; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + + *(code_t*)dst = code; + dst += 4; + } + break; + } + + default: + unreached(); + break; + } + } + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_J_cond: + // b_cond dst-relative. + // + // NOTE: + // the case "imm > 0x7fff" not supported. + // More info within the emitter::emitIns_J_cond_la(); + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2)); + assert(!(imm & 3)); + + ins = id->idIns(); + code = emitInsCode(ins); + code |= ((code_t)id->idReg1()) << 5; + code |= ((code_t)id->idReg2()); + code |= (((code_t)imm << 8) & 0x3fffc00); + + *(code_t*)dst = code; + dst += 4; + + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_J: + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst-relative. + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + assert((imm & 3) == 0); + + ins = id->idIns(); + code = emitInsCode(ins); + if (ins == INS_b || ins == INS_bl) + { + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + } + else if (ins == INS_bnez || ins == INS_beqz) + { + code |= (code_t)id->idReg1() << 5; + code |= (((code_t)imm << 8) & 0x3fffc00); + code |= (((code_t)imm >> 18) & 0x1f); + } + else if (ins == INS_bcnez || ins == INS_bceqz) + { + assert((code_t)(id->idReg1()) < 8); // cc + code |= (code_t)id->idReg1() << 5; + code |= (((code_t)imm << 8) & 0x3fffc00); + code |= (((code_t)imm >> 18) & 0x1f); + } + else if ((INS_beq <= ins) && (ins <= INS_bgeu)) + { + code |= ((code_t)id->idReg1()) << 5; + code |= ((code_t)id->idReg2()); + code |= (((code_t)imm << 8) & 0x3fffc00); + } + else + { + assert(!"unimplemented on LOONGARCH yet"); + } + + *(code_t*)dst = code; + dst += 4; + + sz = sizeof(instrDescJmp); + } + break; + + case INS_OPTS_C: + if (id->idIsLargeCall()) + { + /* Must be a "fat" call descriptor */ + sz = sizeof(instrDescCGCA); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + sz = sizeof(instrDesc); + } + dst += emitOutputCall(ig, dst, id, 0); + ins = INS_nop; + break; + + // case INS_OPTS_NONE: + default: + *(code_t*)dst = id->idAddr()->iiaGetInstrEncode(); + dst += 4; + dst2 = dst; + ins = id->idIns(); + sz = emitSizeOfInsDsc(id); + break; + } + + // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. + // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a + // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as + // for stores, but we ignore those cases here.) + if (emitInsMayWriteToGCReg(ins)) // True if "id->idIns()" writes to a register than can hold GC ref. + { + // We assume that "idReg1" is the primary destination register for all instructions + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst2); + } + else + { + emitGCregDeadUpd(id->idReg1(), dst2); + } + } + + // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC + // ref or overwritten one. + if (emitInsWritesToLclVarStackLoc(id) /*|| emitInsWritesToLclVarStackLocPair(id)*/) + { + int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); + unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); + bool FPbased; + int adr = emitComp->lvaFrameAddress(varNum, &FPbased); + if (id->idGCref() != GCT_NONE) + { + emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst2 DEBUG_ARG(varNum)); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum)); + } + // if (emitInsWritesToLclVarStackLocPair(id)) + //{ + // unsigned ofs2 = ofs + TARGET_POINTER_SIZE; + // if (id->idGCrefReg2() != GCT_NONE) + // { + // emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp); + // } + // else + // { + // // If the type of the local is a gc ref type, update the liveness. + // var_types vt; + // if (varNum >= 0) + // { + // // "Regular" (non-spill-temp) local. + // vt = var_types(emitComp->lvaTable[varNum].lvType); + // } + // else + // { + // TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + // vt = tmpDsc->tdTempType(); + // } + // if (vt == TYP_REF || vt == TYP_BYREF) + // emitGCvarDeadUpd(adr + ofs2, *dp); + // } + //} + } + +#ifdef DEBUG + /* Make sure we set the instruction descriptor size correctly */ + + // size_t expected = emitSizeOfInsDsc(id); + // assert(sz == expected); + + if (emitComp->opts.disAsm || emitComp->verbose) + { + code_t* cp = (code_t*)*dp; + while ((BYTE*)cp != dst) + { + emitDisInsName(*cp, (BYTE*)cp, id); + cp++; + } + } + + if (emitComp->compDebugBreak) + { + // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for + // emitting instruction a6, (i.e. IN00a6 in jitdump). + if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) + { + assert(!"JitBreakEmitOutputInstr reached"); + } + } +#endif + + /* All instructions are expected to generate code */ + + assert(*dp != dst); + + *dp = dst; + + return sz; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +#ifdef DEBUG + +// clang-format off +static const char* const RegNames[] = +{ + #define REGDEF(name, rnum, mask, sname) sname, + #include "register.h" +}; +// clang-format on + +//---------------------------------------------------------------------------------------- +// Disassemble the given instruction. +// The `emitter::emitDisInsName` is focused on the most important for debugging. +// So it implemented as far as simply and independently which is very useful for +// porting easily to the release mode. +// +// Arguments: +// code - The instruction's encoding. +// addr - The address of the code. +// id - The instrDesc of the code if needed. +// +// Note: +// The length of the instruction's name include aligned space is 13. +// + +void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) +{ + const BYTE* insAdr = addr; + const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; + + unsigned int opcode = (code >> 26) & 0x3f; + + bool disOpcode = !emitComp->opts.disDiffable; + bool disAddr = emitComp->opts.disAddr; + if (disAddr) + { + printf(" 0x%llx", insAdr); + } + + printf(" "); + + if (disOpcode) + { + printf("%08X ", code); + } + + // bits: 31-26,MSB6 + switch (opcode) + { + case 0x0: + { + goto Label_OPCODE_0; + } + case 0x2: + { + goto Label_OPCODE_2; + } + case 0x3: + { + goto Label_OPCODE_3; + } + case 0xe: + { + goto Label_OPCODE_E; + } + case LA_2RI16_ADDU16I_D: // 0x4 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + short si16 = (code >> 10) & 0xffff; + printf("addu16i.d %s, %s, %d\n", rd, rj, si16); + return; + } + case 0x5: + case 0x6: + case 0x7: + { + // bits: 31-25,MSB7 + unsigned int inscode = (code >> 25) & 0x7f; + const char* rd = RegNames[code & 0x1f]; + unsigned int si20 = (code >> 5) & 0xfffff; + switch (inscode) + { + case LA_1RI20_LU12I_W: + printf("lu12i.w %s, 0x%x\n", rd, si20); + return; + case LA_1RI20_LU32I_D: + printf("lu32i.d %s, 0x%x\n", rd, si20); + return; + case LA_1RI20_PCADDI: + printf("pcaddi %s, 0x%x\n", rd, si20); + return; + case LA_1RI20_PCALAU12I: + printf("pcalau12i %s, 0x%x\n", rd, si20); + return; + case LA_1RI20_PCADDU12I: + printf("pcaddu12i %s, 0x%x\n", rd, si20); + return; + case LA_1RI20_PCADDU18I: + { + printf("pcaddu18i %s, 0x%x\n", rd, si20); + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0x8: + case 0x9: + { + // bits: 31-24,MSB8 + unsigned int inscode = (code >> 24) & 0xff; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + short si14 = ((code >> 10) & 0x3fff) << 2; + si14 >>= 2; + switch (inscode) + { + case LA_2RI14_LL_W: + printf("ll.w %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_SC_W: + printf("sc.w %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_LL_D: + printf("ll.d %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_SC_D: + printf("sc.d %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_LDPTR_W: + printf("ldptr.w %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_STPTR_W: + printf("stptr.w %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_LDPTR_D: + printf("ldptr.d %s, %s, %d\n", rd, rj, si14); + return; + case LA_2RI14_STPTR_D: + printf("stptr.d %s, %s, %d\n", rd, rj, si14); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0xa: + { + // bits: 31-24,MSB8 + unsigned int inscode = (code >> 22) & 0x3ff; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + short si12 = ((code >> 10) & 0xfff) << 4; + si12 >>= 4; + switch (inscode) + { + case LA_2RI12_LD_B: + printf("ld.b %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_LD_H: + printf("ld.h %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_LD_W: + printf("ld.w %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_LD_D: + printf("ld.d %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_ST_B: + printf("st.b %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_ST_H: + printf("st.h %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_ST_W: + printf("st.w %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_ST_D: + printf("st.d %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_LD_BU: + printf("ld.bu %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_LD_HU: + printf("ld.hu %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_LD_WU: + printf("ld.wu %s, %s, %d\n", rd, rj, si12); + return; + case LA_2RI12_PRELD: + NYI_LOONGARCH64("unused instr LA_2RI12_PRELD"); + return; + case LA_2RI12_FLD_S: + printf("fld.s %s, %s, %d\n", fd, rj, si12); + return; + case LA_2RI12_FST_S: + printf("fst.s %s, %s, %d\n", fd, rj, si12); + return; + case LA_2RI12_FLD_D: + printf("fld.d %s, %s, %d\n", fd, rj, si12); + return; + case LA_2RI12_FST_D: + printf("fst.d %s, %s, %d\n", fd, rj, si12); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case LA_1RI21_BEQZ: // 0x10 + { + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; + offs21 >>= 9; + printf("beqz %s, 0x%llx\n", rj, (int64_t)insAdr + offs21); + return; + } + case LA_1RI21_BNEZ: // 0x11 + { + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; + offs21 >>= 9; + printf("bnez %s, 0x%llx\n", rj, (int64_t)insAdr + offs21); + return; + } + case 0x12: + { + // LA_1RI21_BCEQZ + // LA_1RI21_BCNEZ + const char* cj = CFregName[(code >> 5) & 0x7]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; + offs21 >>= 9; + if (0 == ((code >> 8) & 0x3)) + { + printf("bceqz %s, 0x%llx\n", cj, (int64_t)insAdr + offs21); + return; + } + else if (1 == ((code >> 8) & 0x3)) + { + printf("bcnez %s, 0x%llx\n", cj, (int64_t)insAdr + offs21); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case LA_2RI16_JIRL: // 0x13 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + if (id->idDebugOnlyInfo()->idMemCookie) + { + assert(0 < id->idDebugOnlyInfo()->idMemCookie); + const char* methodName; + methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); + printf("jirl %s, %s, %d #%s\n", rd, rj, offs16, methodName); + } + else + { + printf("jirl %s, %s, %d\n", rd, rj, offs16); + } + return; + } + case LA_I26_B: // 0x14 + { + int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6; + offs26 >>= 4; + printf("b 0x%llx\n", (int64_t)insAdr + offs26); + return; + } + case LA_I26_BL: // 0x15 + { + int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6; + offs26 >>= 4; + printf("bl 0x%llx\n", (int64_t)insAdr + offs26); + return; + } + case LA_2RI16_BEQ: // 0x16 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf("beq %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); + return; + } + case LA_2RI16_BNE: // 0x17 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf("bne %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); + return; + } + case LA_2RI16_BLT: // 0x18 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf("blt %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); + return; + } + case LA_2RI16_BGE: // 0x19 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf("bge %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); + return; + } + case LA_2RI16_BLTU: // 0x1a + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf("bltu %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); + return; + } + case LA_2RI16_BGEU: // 0x1b + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf("bgeu %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); + return; + } + + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + +Label_OPCODE_0: + opcode = (code >> 22) & 0x3ff; + + // bits: 31-22,MSB10 + switch (opcode) + { + case 0x0: + { + // bits: 31-18,MSB14 + unsigned int inscode1 = (code >> 18) & 0x3fff; + switch (inscode1) + { + case 0x0: + { + // bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; + switch (inscode2) + { + case 0x0: + { + // bits:31-10,MSB22 + unsigned int inscode3 = (code >> 10) & 0x3fffff; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + switch (inscode3) + { + case LA_2R_CLO_W: + printf("clo.w %s, %s\n", rd, rj); + return; + case LA_2R_CLZ_W: + printf("clz.w %s, %s\n", rd, rj); + return; + case LA_2R_CTO_W: + printf("cto.w %s, %s\n", rd, rj); + return; + case LA_2R_CTZ_W: + printf("ctz.w %s, %s\n", rd, rj); + return; + case LA_2R_CLO_D: + printf("clo.d %s, %s\n", rd, rj); + return; + case LA_2R_CLZ_D: + printf("clz.d %s, %s\n", rd, rj); + return; + case LA_2R_CTO_D: + printf("cto.d %s, %s\n", rd, rj); + return; + case LA_2R_CTZ_D: + printf("ctz.d %s, %s\n", rd, rj); + return; + case LA_2R_REVB_2H: + printf("revb.2h %s, %s\n", rd, rj); + return; + case LA_2R_REVB_4H: + printf("revb.4h %s, %s\n", rd, rj); + return; + case LA_2R_REVB_2W: + printf("revb.2w %s, %s\n", rd, rj); + return; + case LA_2R_REVB_D: + printf("revb.d %s, %s\n", rd, rj); + return; + case LA_2R_REVH_2W: + printf("revh.2w %s, %s\n", rd, rj); + return; + case LA_2R_REVH_D: + printf("revh.d %s, %s\n", rd, rj); + return; + case LA_2R_BITREV_4B: + printf("bitrev.4b %s, %s\n", rd, rj); + return; + case LA_2R_BITREV_8B: + printf("bitrev.8b %s, %s\n", rd, rj); + return; + case LA_2R_BITREV_W: + printf("bitrev.w %s, %s\n", rd, rj); + return; + case LA_2R_BITREV_D: + printf("bitrev.d %s, %s\n", rd, rj); + return; + case LA_2R_EXT_W_H: + printf("ext.w.h %s, %s\n", rd, rj); + return; + case LA_2R_EXT_W_B: + printf("ext.w.b %s, %s\n", rd, rj); + return; + case LA_2R_RDTIMEL_W: + printf("rdtimel.w %s, %s\n", rd, rj); + return; + case LA_2R_RDTIMEH_W: + printf("rdtimeh.w %s, %s\n", rd, rj); + return; + case LA_2R_RDTIME_D: + printf("rdtime.d %s, %s\n", rd, rj); + return; + case LA_2R_CPUCFG: + printf("cpucfg %s, %s\n", rd, rj); + return; + + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case LA_2R_ASRTLE_D: + { + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("asrtle.d %s, %s\n", rj, rk); + return; + } + case LA_2R_ASRTGT_D: + { + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("asrtgt.d %s, %s\n", rj, rk); + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0x1: + { + // LA_OP_ALSL_W + // LA_OP_ALSL_WU + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + unsigned int sa2 = (code >> 15) & 0x3; + if (0 == ((code >> 17) & 0x1)) + { + printf("alsl.w %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1)); + return; + } + else if (1 == ((code >> 17) & 0x1)) + { + printf("alsl.wu %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1)); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case LA_OP_BYTEPICK_W: // 0x2 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + unsigned int sa2 = (code >> 15) & 0x3; + printf("bytepick.w %s, %s, %s, %d\n", rd, rj, rk, sa2); + return; + } + case LA_OP_BYTEPICK_D: // 0x3 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + unsigned int sa3 = (code >> 15) & 0x7; + printf("bytepick.d %s, %s, %s, %d\n", rd, rj, rk, sa3); + return; + } + case 0x4: + case 0x5: + case 0x6: + case 0x7: + case 0x8: + case 0x9: + { + // bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + + switch (inscode2) + { + case LA_3R_ADD_W: + printf("add.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_ADD_D: + printf("add.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SUB_W: + printf("sub.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SUB_D: + printf("sub.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SLT: + printf("slt %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SLTU: + printf("sltu %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MASKEQZ: + printf("maskeqz %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MASKNEZ: + printf("masknez %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_NOR: + printf("nor %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_AND: + printf("and %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_OR: + printf("or %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_XOR: + printf("xor %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_ORN: + printf("orn %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_ANDN: + printf("andn %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SLL_W: + printf("sll.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SRL_W: + printf("srl.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SRA_W: + printf("sra.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SLL_D: + printf("sll.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SRL_D: + printf("srl.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_SRA_D: + printf("sra.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_ROTR_W: + printf("rotr.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_ROTR_D: + printf("rotr.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MUL_W: + printf("mul.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MULH_W: + printf("mulh.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MULH_WU: + printf("mulh.wu %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MUL_D: + printf("mul.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MULH_D: + printf("mulh.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MULH_DU: + printf("mulh.du %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MULW_D_W: + printf("mulw.d.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MULW_D_WU: + printf("mulw.d.wu %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_DIV_W: + printf("div.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MOD_W: + printf("mod.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_DIV_WU: + printf("div.wu %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MOD_WU: + printf("mod.wu %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_DIV_D: + printf("div.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MOD_D: + printf("mod.d %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_DIV_DU: + printf("div.du %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_MOD_DU: + printf("mod.du %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRC_W_B_W: + printf("crc.w.b.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRC_W_H_W: + printf("crc.w.h.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRC_W_W_W: + printf("crc.w.w.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRC_W_D_W: + printf("crc.w.d.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRCC_W_B_W: + printf("crcc.w.b.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRCC_W_H_W: + printf("crcc.w.h.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRCC_W_W_W: + printf("crcc.w.w.w %s, %s, %s\n", rd, rj, rk); + return; + case LA_3R_CRCC_W_D_W: + printf("crcc.w.d.w %s, %s, %s\n", rd, rj, rk); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + } + case 0xa: + { + // bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; + unsigned int codefield = code & 0x7fff; + switch (inscode2) + { + case LA_OP_BREAK: + printf("break 0x%x\n", codefield); + return; + case LA_OP_DBGCALL: + printf("dbgcall 0x%x\n", codefield); + return; + case LA_OP_SYSCALL: + printf("syscall 0x%x\n", codefield); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + } + case LA_OP_ALSL_D: // 0xb + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + unsigned int sa2 = (code >> 15) & 0x3; + printf("alsl.d %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1)); + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0x1: + { + if (code & 0x200000) + { + // LA_OP_BSTRINS_W + // LA_OP_BSTRPICK_W + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int lsbw = (code >> 10) & 0x1f; + unsigned int msbw = (code >> 16) & 0x1f; + if (!(code & 0x8000)) + { + printf("bstrins.w %s, %s, %d, %d\n", rd, rj, msbw, lsbw); + return; + } + else if (code & 0x8000) + { + printf("bstrpick.w %s, %s, %d, %d\n", rd, rj, msbw, lsbw); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + } + else + { + // bits: 31-18,MSB14 + unsigned int inscode1 = (code >> 18) & 0x3fff; + switch (inscode1) + { + case 0x10: + { + // LA_OP_SLLI_W: + // LA_OP_SLLI_D: + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; + printf("slli.w %s, %s, %d\n", rd, rj, ui5); + return; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; + printf("slli.d %s, %s, %d\n", rd, rj, ui6); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0x11: + { + // LA_OP_SRLI_W: + // LA_OP_SRLI_D: + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; + printf("srli.w %s, %s, %d\n", rd, rj, ui5); + return; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; + printf("srli.d %s, %s, %d\n", rd, rj, ui6); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0x12: + { + // LA_OP_SRAI_W: + // LA_OP_SRAI_D: + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; + printf("srai.w %s, %s, %d\n", rd, rj, ui5); + return; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; + printf("srai.d %s, %s, %d\n", rd, rj, ui6); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case 0x13: + { + // LA_OP_ROTRI_W: + // LA_OP_ROTRI_D: + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; + printf("rotri.w %s, %s, %d\n", rd, rj, ui5); + return; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; + printf("rotri.d %s, %s, %d\n", rd, rj, ui6); + return; + } + else + { + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + return; + } + case LA_OP_BSTRINS_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int lsbd = (code >> 10) & 0x3f; + unsigned int msbd = (code >> 16) & 0x3f; + printf("bstrins.d %s, %s, %d, %d\n", rd, rj, msbd, lsbd); + return; + } + case LA_OP_BSTRPICK_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int lsbd = (code >> 10) & 0x3f; + unsigned int msbd = (code >> 16) & 0x3f; + printf("bstrpick.d %s, %s, %d, %d\n", rd, rj, msbd, lsbd); + return; + } + case 0x4: + { + // bits: 31-15,MSB17 + unsigned int inscode1 = (code >> 15) & 0x1ffff; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + + switch (inscode1) + { + case LA_3R_FADD_S: + printf("fadd.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FADD_D: + printf("fadd.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FSUB_S: + printf("fsub.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FSUB_D: + printf("fsub.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMUL_S: + printf("fmul.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMUL_D: + printf("fmul.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FDIV_S: + printf("fdiv.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FDIV_D: + printf("fdiv.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMAX_S: + printf("fmax.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMAX_D: + printf("fmax.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMIN_S: + printf("fmin.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMIN_D: + printf("fmin.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMAXA_S: + printf("fmaxa.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMAXA_D: + printf("fmaxa.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMINA_S: + printf("fmina.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FMINA_D: + printf("fmina.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FSCALEB_S: + printf("fscaleb.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FSCALEB_D: + printf("fscaleb.d %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FCOPYSIGN_S: + printf("fcopysign.s %s, %s, %s\n", fd, fj, fk); + return; + case LA_3R_FCOPYSIGN_D: + printf("fcopysign.d %s, %s, %s\n", fd, fj, fk); + return; + case 0x228: + case 0x229: + case 0x232: + case 0x234: + case 0x235: + case 0x236: + case 0x23a: + case 0x23c: + { + // bits:31-10,MSB22 + unsigned int inscode2 = (code >> 10) & 0x3fffff; + switch (inscode2) + { + case LA_2R_FABS_S: + printf("fabs.s %s, %s\n", fd, fj); + return; + case LA_2R_FABS_D: + printf("fabs.d %s, %s\n", fd, fj); + return; + case LA_2R_FNEG_S: + printf("fneg.s %s, %s\n", fd, fj); + return; + case LA_2R_FNEG_D: + printf("fneg.d %s, %s\n", fd, fj); + return; + case LA_2R_FLOGB_S: + printf("flogb.s %s, %s\n", fd, fj); + return; + case LA_2R_FLOGB_D: + printf("flogb.d %s, %s\n", fd, fj); + return; + case LA_2R_FCLASS_S: + printf("fclass.s %s, %s\n", fd, fj); + return; + case LA_2R_FCLASS_D: + printf("fclass.d %s, %s\n", fd, fj); + return; + case LA_2R_FSQRT_S: + printf("fsqrt.s %s, %s\n", fd, fj); + return; + case LA_2R_FSQRT_D: + printf("fsqrt.d %s, %s\n", fd, fj); + return; + case LA_2R_FRECIP_S: + printf("frecip.s %s, %s\n", fd, fj); + return; + case LA_2R_FRECIP_D: + printf("frecip.d %s, %s\n", fd, fj); + return; + case LA_2R_FRSQRT_S: + printf("frsqrt.s %s, %s\n", fd, fj); + return; + case LA_2R_FRSQRT_D: + printf("frsqrt.d %s, %s\n", fd, fj); + return; + case LA_2R_FMOV_S: + printf("fmov.s %s, %s\n", fd, fj); + return; + case LA_2R_FMOV_D: + printf("fmov.d %s, %s\n", fd, fj); + return; + case LA_2R_MOVGR2FR_W: + printf("movgr2fr.w %s, %s\n", fd, rj); + return; + case LA_2R_MOVGR2FR_D: + printf("movgr2fr.d %s, %s\n", fd, rj); + return; + case LA_2R_MOVGR2FRH_W: + printf("movgr2frh.w %s, %s\n", fd, rj); + return; + case LA_2R_MOVFR2GR_S: + printf("movfr2gr.s %s, %s\n", rd, fj); + return; + case LA_2R_MOVFR2GR_D: + printf("movfr2gr.d %s, %s\n", rd, fj); + return; + case LA_2R_MOVFRH2GR_S: + printf("movfrh2gr.s %s, %s\n", rd, fj); + return; + case LA_2R_MOVGR2FCSR: + NYI_LOONGARCH64("unused instr LA_2R_MOVGR2FCSR"); + return; + case LA_2R_MOVFCSR2GR: + NYI_LOONGARCH64("unused instr LA_2R_MOVFCSR2GR"); + return; + case LA_2R_MOVFR2CF: + { + const char* cd = CFregName[code & 0x7]; + printf("movfr2cf %s, %s\n", cd, fj); + return; + } + case LA_2R_MOVCF2FR: + { + const char* cj = CFregName[(code >> 5) & 0x7]; + printf("movcf2fr %s, %s\n", fd, cj); + return; + } + case LA_2R_MOVGR2CF: + { + const char* cd = CFregName[code & 0x7]; + printf("movgr2cf %s, %s\n", cd, rj); + return; + } + case LA_2R_MOVCF2GR: + { + const char* cj = CFregName[(code >> 5) & 0x7]; + printf("movcf2gr %s, %s\n", rd, cj); + return; + } + case LA_2R_FCVT_S_D: + printf("fcvt.s.d %s, %s\n", fd, fj); + return; + case LA_2R_FCVT_D_S: + printf("fcvt.d.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRM_W_S: + printf("ftintrm.w.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRM_W_D: + printf("ftintrm.w.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRM_L_S: + printf("ftintrm.l.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRM_L_D: + printf("ftintrm.l.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRP_W_S: + printf("ftintrp.w.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRP_W_D: + printf("ftintrp.w.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRP_L_S: + printf("ftintrp.l.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRP_L_D: + printf("ftintrp.l.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRZ_W_S: + printf("ftintrz.w.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRZ_W_D: + printf("ftintrz.w.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRZ_L_S: + printf("ftintrz.l.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRZ_L_D: + printf("ftintrz.l.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRNE_W_S: + printf("ftintrne.w.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRNE_W_D: + printf("ftintrne.w.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRNE_L_S: + printf("ftintrne.l.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINTRNE_L_D: + printf("ftintrne.l.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINT_W_S: + printf("ftint.w.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINT_W_D: + printf("ftint.w.d %s, %s\n", fd, fj); + return; + case LA_2R_FTINT_L_S: + printf("ftint.l.s %s, %s\n", fd, fj); + return; + case LA_2R_FTINT_L_D: + printf("ftint.l.d %s, %s\n", fd, fj); + return; + case LA_2R_FFINT_S_W: + printf("ffint.s.w %s, %s\n", fd, fj); + return; + case LA_2R_FFINT_S_L: + printf("ffint.s.l %s, %s\n", fd, fj); + return; + case LA_2R_FFINT_D_W: + printf("ffint.d.w %s, %s\n", fd, fj); + return; + case LA_2R_FFINT_D_L: + printf("ffint.d.l %s, %s\n", fd, fj); + return; + case LA_2R_FRINT_S: + printf("frint.s %s, %s\n", fd, fj); + return; + case LA_2R_FRINT_D: + printf("frint.d %s, %s\n", fd, fj); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + return; + } + case LA_2RI12_SLTI: // 0x8 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; + si12 >>= 4; + printf("slti %s, %s, %d\n", rd, rj, si12); + return; + } + case LA_2RI12_SLTUI: // 0x9 + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; + si12 >>= 4; + printf("sltui %s, %s, %d\n", rd, rj, si12); + return; + } + case LA_2RI12_ADDI_W: // 0xa + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; + si12 >>= 4; + printf("addi.w %s, %s, %d\n", rd, rj, si12); + return; + } + case LA_2RI12_ADDI_D: // 0xb + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; + si12 >>= 4; + printf("addi.d %s, %s, %ld\n", rd, rj, si12); + return; + } + case LA_2RI12_LU52I_D: // 0xc + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int si12 = (code >> 10) & 0xfff; + printf("lu52i.d %s, %s, 0x%x\n", rd, rj, si12); + return; + } + case LA_2RI12_ANDI: // 0xd + { + if (code == 0x03400000) + { + printf("nop\n"); + } + else + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf("andi %s, %s, 0x%x\n", rd, rj, ui12); + } + return; + } + case LA_2RI12_ORI: // 0xe + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf("ori %s, %s, 0x%x\n", rd, rj, ui12); + return; + } + case LA_2RI12_XORI: // 0xf + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf("xori %s, %s, 0x%x\n", rd, rj, ui12); + return; + } + + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + +// Label_OPCODE_1: +// opcode = (code >> 24) & 0xff; +// //bits: 31-24,MSB8 + +Label_OPCODE_2: + opcode = (code >> 20) & 0xfff; + + // bits: 31-20,MSB12 + switch (opcode) + { + case LA_4R_FMADD_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fmadd.s %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FMADD_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fmadd.d %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FMSUB_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fmsub.s %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FMSUB_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fmsub.d %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FNMADD_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fnmadd.s %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FNMADD_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fnmadd.d %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FNMSUB_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fnmsub.s %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + case LA_4R_FNMSUB_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; + printf("fnmsub.d %s, %s, %s, %s\n", fd, fj, fk, fa); + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + +Label_OPCODE_3: + opcode = (code >> 20) & 0xfff; + + // bits: 31-20,MSB12 + switch (opcode) + { + case LA_OP_FCMP_cond_S: + { + // bits:19-15,cond + unsigned int cond = (code >> 15) & 0x1f; + const char* cd = CFregName[code & 0x7]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + switch (cond) + { + case 0x0: + printf("fcmp.caf.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x1: + printf("fcmp.saf.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x2: + printf("fcmp.clt.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x3: + printf("fcmp.slt.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x4: + printf("fcmp.ceq.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x5: + printf("fcmp.seq.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x6: + printf("fcmp.cle.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x7: + printf("fcmp.sle.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x8: + printf("fcmp.cun.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x9: + printf("fcmp.sun.s %s, %s, %s\n", cd, fj, fk); + return; + case 0xA: + printf("fcmp.cult.s %s, %s, %s\n", cd, fj, fk); + return; + case 0xB: + printf("fcmp.sult.s %s, %s, %s\n", cd, fj, fk); + return; + case 0xC: + printf("fcmp.cueq.s %s, %s, %s\n", cd, fj, fk); + return; + case 0xD: + printf("fcmp.sueq.s %s, %s, %s\n", cd, fj, fk); + return; + case 0xE: + printf("fcmp.cule.s %s, %s, %s\n", cd, fj, fk); + return; + case 0xF: + printf("fcmp.sule.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x10: + printf("fcmp.cne.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x11: + printf("fcmp.sne.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x14: + printf("fcmp.cor.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x15: + printf("fcmp.sor.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x18: + printf("fcmp.cune.s %s, %s, %s\n", cd, fj, fk); + return; + case 0x19: + printf("fcmp.sune.s %s, %s, %s\n", cd, fj, fk); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + } + case LA_OP_FCMP_cond_D: + { + // bits:19-15,cond + unsigned int cond = (code >> 15) & 0x1f; + const char* cd = CFregName[code & 0x7]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + switch (cond) + { + case 0x0: + printf("fcmp.caf.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x1: + printf("fcmp.saf.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x2: + printf("fcmp.clt.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x3: + printf("fcmp.slt.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x4: + printf("fcmp.ceq.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x5: + printf("fcmp.seq.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x6: + printf("fcmp.cle.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x7: + printf("fcmp.sle.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x8: + printf("fcmp.cun.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x9: + printf("fcmp.sun.d %s, %s, %s\n", cd, fj, fk); + return; + case 0xA: + printf("fcmp.cult.d %s, %s, %s\n", cd, fj, fk); + return; + case 0xB: + printf("fcmp.sult.d %s, %s, %s\n", cd, fj, fk); + return; + case 0xC: + printf("fcmp.cueq.d %s, %s, %s\n", cd, fj, fk); + return; + case 0xD: + printf("fcmp.sueq.d %s, %s, %s\n", cd, fj, fk); + return; + case 0xE: + printf("fcmp.cule.d %s, %s, %s\n", cd, fj, fk); + return; + case 0xF: + printf("fcmp.sule.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x10: + printf("fcmp.cne.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x11: + printf("fcmp.sne.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x14: + printf("fcmp.cor.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x15: + printf("fcmp.sor.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x18: + printf("fcmp.cune.d %s, %s, %s\n", cd, fj, fk); + return; + case 0x19: + printf("fcmp.sune.d %s, %s, %s\n", cd, fj, fk); + return; + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + } + case LA_4R_FSEL: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* ca = CFregName[(code >> 15) & 0x7]; + printf("fsel %s, %s, %s, %s\n", fd, fj, fk, ca); + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } + +Label_OPCODE_E: + opcode = (code >> 15) & 0x1ffff; + + // bits: 31-15,MSB17 + switch (opcode) + { + case LA_3R_LDX_B: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.b %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDX_H: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.h %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDX_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDX_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STX_B: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stx.b %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STX_H: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stx.h %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STX_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stx.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STX_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stx.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDX_BU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.bu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDX_HU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.hu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDX_WU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldx.wu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_PRELDX: + NYI_LOONGARCH64("unused instr LA_3R_PRELDX"); + return; + case LA_3R_FLDX_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fldx.s %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FLDX_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fldx.d %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FSTX_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fstx.s %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FSTX_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fstx.d %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_AMSWAP_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amswap.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMSWAP_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amswap.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMADD_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amadd.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMADD_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amadd.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMAND_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amand.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMAND_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amand.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMOR_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amor.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMOR_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amor.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMXOR_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amxor.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMXOR_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amxor.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_WU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax.wu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_DU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax.du %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_WU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin.wu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_DU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin.du %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMSWAP_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amswap_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMSWAP_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amswap_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMADD_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amadd_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMADD_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amadd_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMAND_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amand_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMAND_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amand_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMOR_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amor_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMOR_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amor_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMXOR_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amxor_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMXOR_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("amxor_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin_db.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin_db.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_WU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax_db.wu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_DU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammax_db.du %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_WU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin_db.wu %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_DU: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ammin_db.du %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_OP_DBAR: + { + unsigned int hint = code & 0x7fff; + printf("dbar 0x%x\n", hint); + return; + } + case LA_OP_IBAR: + { + unsigned int hint = code & 0x7fff; + printf("ibar 0x%x\n", hint); + return; + } + case LA_3R_FLDGT_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fldgt.s %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FLDGT_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fldgt.d %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FLDLE_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fldle.s %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FLDLE_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fldle.d %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FSTGT_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fstgt.s %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FSTGT_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fstgt.d %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FSTLE_S: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fstle.s %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_FSTLE_D: + { + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("fstle.d %s, %s, %s\n", fd, rj, rk); + return; + } + case LA_3R_LDGT_B: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldgt.b %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDGT_H: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldgt.h %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDGT_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldgt.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDGT_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldgt.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDLE_B: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldle.b %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDLE_H: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldle.h %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDLE_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldle.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_LDLE_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("ldle.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STGT_B: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stgt.b %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STGT_H: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stgt.h %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STGT_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stgt.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STGT_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stgt.d %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STLE_B: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stle.b %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STLE_H: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stle.h %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STLE_W: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stle.w %s, %s, %s\n", rd, rj, rk); + return; + } + case LA_3R_STLE_D: + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; + printf("stle.d %s, %s, %s\n", rd, rj, rk); + return; + } + default: + printf("LOONGARCH illegal instruction: %08X\n", code); + return; + } +} + +/***************************************************************************** + * + * Display (optionally) the instruction encoding in hex + */ + +void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) +{ + // We do not display the instruction hex if we want diff-able disassembly + if (!emitComp->opts.disDiffable) + { + if (sz == 4) + { + printf(" %08X ", (*((code_t*)code))); + } + else + { + assert(sz == 0); + printf(" "); + } + } +} + +void emitter::emitDispIns( + instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +{ + // LA implements this similar by `emitter::emitDisInsName`. + // For LA maybe the `emitDispIns` is over complicate. + // The `emitter::emitDisInsName` is focused on the most important for debugging. + NYI_LOONGARCH64("LA not used the emitter::emitDispIns"); +} + +/***************************************************************************** + * + * Display a stack frame reference. + */ + +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +{ + printf("["); + + if (varx < 0) + printf("TEMP_%02u", -varx); + else + emitComp->gtDispLclVar(+varx, false); + + if (disp < 0) + printf("-0x%02x", -disp); + else if (disp > 0) + printf("+0x%02x", +disp); + + printf("]"); + + if (varx >= 0 && emitComp->opts.varNames) + { + LclVarDsc* varDsc; + const char* varName; + + assert((unsigned)varx < emitComp->lvaCount); + varDsc = emitComp->lvaTable + varx; + varName = emitComp->compLocalVarName(varx, offs); + + if (varName) + { + printf("'%s", varName); + + if (disp < 0) + printf("-%d", -disp); + else if (disp > 0) + printf("+%d", +disp); + + printf("'"); + } + } +} + +#endif // DEBUG + +// Generate code for a load or store operation with a potentially complex addressing mode +// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset] +// Since LOONGARCH64 does not directly support this complex of an addressing mode +// we may generates up to three instructions for this for LOONGARCH64 +// +void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) +{ + GenTree* addr = indir->Addr(); + + if (addr->isContained()) + { + assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA)); + + int offset = 0; + DWORD lsl = 0; + + if (addr->OperGet() == GT_LEA) + { + offset = addr->AsAddrMode()->Offset(); + if (addr->AsAddrMode()->gtScale > 0) + { + assert(isPow2(addr->AsAddrMode()->gtScale)); + BitScanForward(&lsl, addr->AsAddrMode()->gtScale); + } + } + + GenTree* memBase = indir->Base(); + emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; + + if (indir->HasIndex()) + { + GenTree* index = indir->Index(); + + if (offset != 0) + { + regNumber tmpReg = indir->GetSingleTempReg(); + + if (isValidSimm12(offset)) + { + if (lsl > 0) + { + // Generate code to set tmpReg = base + index*scale + emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), REG_R21); + } + else // no scale + { + // Generate code to set tmpReg = base + index + emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum()); + } + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + + // Then load/store dataReg from/to [tmpReg + offset] + emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset); + } + else // large offset + { + // First load/store tmpReg with the large offset constant + emitIns_I_la(EA_PTRSIZE, tmpReg, + offset); // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // Then add the base register + // rd = rd + base + emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, memBase->GetRegNum()); + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + noway_assert(tmpReg != index->GetRegNum()); + + // Then load/store dataReg from/to [tmpReg + index*scale] + emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, REG_R21); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0); + } + } + else // (offset == 0) + { + // Then load/store dataReg from/to [memBase + index] + switch (EA_SIZE(emitTypeSize(indir->TypeGet()))) + { + case EA_1BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_bu; + else + ins = INS_ldx_b; + } + else + ins = INS_stx_b; + break; + case EA_2BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_hu; + else + ins = INS_ldx_h; + } + else + ins = INS_stx_h; + break; + case EA_4BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || + (ins == INS_fst_s) || (ins == INS_fld_s)); + assert(INS_fst_s > INS_st_d); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_wu; + else + ins = INS_ldx_w; + } + else if (ins == INS_fld_s) + ins = INS_fldx_s; + else if (ins == INS_fst_s) + ins = INS_fstx_s; + else + ins = INS_stx_w; + break; + case EA_8BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || + (ins == INS_fst_d) || (ins == INS_fld_d)); + assert(INS_fst_d > INS_st_d); + if (ins <= INS_ld_wu) + { + ins = INS_ldx_d; + } + else if (ins == INS_fld_d) + ins = INS_fldx_d; + else if (ins == INS_fst_d) + ins = INS_fstx_d; + else + ins = INS_stx_d; + break; + default: + assert(!"------------TODO for LOONGARCH64: unsupported ins."); + } + + if (lsl > 0) + { + // Then load/store dataReg from/to [memBase + index*scale] + emitIns_R_R_I(INS_slli_d, emitActualTypeSize(index->TypeGet()), REG_R21, index->GetRegNum(), lsl); + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), REG_R21); + } + else // no scale + { + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum()); + } + } + } + else // no Index register + { + if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Get a temp integer register to compute long address. + regNumber addrReg = indir->GetSingleTempReg(); + emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0); + } + else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + unsigned offset = varNode->GetLclOffs(); + if (emitInsIsStore(ins)) + { + emitIns_S_R(ins, attr, dataReg, lclNum, offset); + } + else + { + emitIns_R_S(ins, attr, dataReg, lclNum, offset); + } + } + else if (isValidSimm12(offset)) + { + // Then load/store dataReg from/to [memBase + offset] + emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset); + } + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = indir->GetSingleTempReg(); + + // First load/store tmpReg with the large offset constant + emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + + // Then load/store dataReg from/to [memBase + tmpReg] + emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), tmpReg); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0); + } + } + } + else // addr is not contained, so we evaluate it into a register + { +#ifdef DEBUG + if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // If the local var is a gcref or byref, the local var better be untracked, because we have + // no logic here to track local variable lifetime changes, like we do in the contained case + // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local + // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + LclVarDsc* varDsc = emitComp->lvaGetDesc(lclNum); + assert(!varDsc->lvTracked); + } +#endif // DEBUG + // Then load/store dataReg from/to [addrReg] + emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0); + } +} + +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. + +regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) +{ + NYI_LOONGARCH64("emitInsBinary-----unused"); + return REG_R0; +} + +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. +regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) +{ + // dst can only be a reg + assert(!dst->isContained()); + + // find immed (if any) - it cannot be a dst + // Only one src can be an int. + GenTreeIntConCommon* intConst = nullptr; + GenTree* nonIntReg = nullptr; + + bool needCheckOv = dst->gtOverflowEx(); + + if (varTypeIsFloating(dst)) + { + // src1 can only be a reg + assert(!src1->isContained()); + // src2 can only be a reg + assert(!src2->isContained()); + } + else // not floating point + { + // src2 can be immed or reg + assert(!src2->isContained() || src2->isContainedIntOrIImmed()); + + // Check src2 first as we can always allow it to be a contained immediate + if (src2->isContainedIntOrIImmed()) + { + intConst = src2->AsIntConCommon(); + nonIntReg = src1; + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (dst->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!src2->isContainedIntOrIImmed()); + intConst = src1->AsIntConCommon(); + nonIntReg = src2; + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + } + + if (needCheckOv) + { + if (ins == INS_add_d) + { + assert(attr == EA_8BYTE); + } + else if (ins == INS_add_w) // || ins == INS_add + { + assert(attr == EA_4BYTE); + } + else if (ins == INS_addi_d) + { + assert(intConst != nullptr); + } + else if (ins == INS_addi_w) + { + assert(intConst != nullptr); + } + else if (ins == INS_sub_d) + { + assert(attr == EA_8BYTE); + } + else if (ins == INS_sub_w) + { + assert(attr == EA_4BYTE); + } + else if ((ins == INS_mul_d) || (ins == INS_mulh_d) || (ins == INS_mulh_du)) + { + assert(attr == EA_8BYTE); + // NOTE: overflow format doesn't support an int constant operand directly. + assert(intConst == nullptr); + } + else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) || + (ins == INS_mulw_d_wu)) + { + assert(attr == EA_4BYTE); + // NOTE: overflow format doesn't support an int constant operand directly. + assert(intConst == nullptr); + } + else + { +#ifdef DEBUG + printf("LOONGARCH64-Invalid ins for overflow check: %s\n", codeGen->genInsName(ins)); +#endif + assert(!"Invalid ins for overflow check"); + } + } + + if (intConst != nullptr) + { + ssize_t imm = intConst->IconValue(); + if (ins == INS_andi || ins == INS_ori || ins == INS_xori) + { + assert(isValidUimm12(imm)); + } + else + { + assert(isValidSimm12(imm)); + } + + if (ins == INS_sub_d) + { + assert(attr == EA_8BYTE); + assert(imm != -2048); + ins = INS_addi_d; + imm = -imm; + } + else if (ins == INS_sub_w) + { + assert(attr == EA_4BYTE); + assert(imm != -2048); + ins = INS_addi_w; + imm = -imm; + } + + assert(ins == INS_addi_d || ins == INS_addi_w || ins == INS_andi || ins == INS_ori || ins == INS_xori); + + if (needCheckOv) + { + emitIns_R_R_R(INS_or, attr, REG_R21, nonIntReg->GetRegNum(), REG_R0); + } + + emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), imm); + + if (needCheckOv) + { + if (ins == INS_addi_d || ins == INS_addi_w) + { + // A = B + C + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, dst->GetRegNum(), nullptr, REG_R21); + } + else + { + if (imm > 0) + { + // B > 0 and C > 0, if A < B, goto overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + emitIns_J_cond_la(INS_bge, tmpLabel, REG_R0, REG_R21); + emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_R21, dst->GetRegNum(), imm); + + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + + codeGen->genDefineTempLabel(tmpLabel); + } + else if (imm < 0) + { + // B < 0 and C < 0, if A > B, goto overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + emitIns_J_cond_la(INS_bge, tmpLabel, REG_R21, REG_R0); + emitIns_R_R_I(INS_addi_d, attr, REG_R21, REG_R0, imm); + + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, REG_R21, nullptr, dst->GetRegNum()); + + codeGen->genDefineTempLabel(tmpLabel); + } + } + } + else + { + assert(!"unimplemented on LOONGARCH yet"); + } + } + } + else if (varTypeIsFloating(dst)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + else if (dst->OperGet() == GT_MUL) + { + if (!needCheckOv && !(dst->gtFlags & GTF_UNSIGNED)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + else + { + if (needCheckOv) + { + assert(REG_R21 != dst->GetRegNum()); + assert(REG_R21 != src1->GetRegNum()); + assert(REG_R21 != src2->GetRegNum()); + + instruction ins2; + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + ins2 = INS_mulh_wu; + else + ins2 = INS_mulh_du; + } + else + { + if (attr == EA_8BYTE) + ins2 = INS_mulh_d; + else + ins2 = INS_mulh_w; + } + + emitIns_R_R_R(ins2, attr, REG_R21, src1->GetRegNum(), src2->GetRegNum()); + } + + // n * n bytes will store n bytes result + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32); + } + + if (needCheckOv) + { + assert(REG_R21 != dst->GetRegNum()); + assert(REG_R21 != src1->GetRegNum()); + assert(REG_R21 != src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + else + { + assert(REG_RA != dst->GetRegNum()); + assert(REG_RA != src1->GetRegNum()); + assert(REG_RA != src2->GetRegNum()); + size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31; + emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(), + imm); + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_RA); + } + } + } + } + else if (dst->OperIs(GT_AND, GT_AND_NOT, GT_OR, GT_XOR)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + + // TODO-LOONGARCH64-CQ: here sign-extend dst when deal with 32bit data is too conservative. + if (EA_SIZE(attr) == EA_4BYTE) + emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0); + } + else + { + regNumber regOp1 = src1->GetRegNum(); + regNumber regOp2 = src2->GetRegNum(); + regNumber saveOperReg1 = REG_NA; + regNumber saveOperReg2 = REG_NA; + + if ((dst->gtFlags & GTF_UNSIGNED) && (attr == EA_8BYTE)) + { + if (src1->gtType == TYP_INT) + { + assert(REG_R21 != regOp1); + assert(REG_RA != regOp1); + emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp1, /*src1->GetRegNum(),*/ 31, 0); + regOp1 = REG_RA; // dst->ExtractTempReg(); + } + if (src2->gtType == TYP_INT) + { + assert(REG_R21 != regOp2); + assert(REG_RA != regOp2); + emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_R21, regOp2, /*src2->GetRegNum(),*/ 31, 0); + regOp2 = REG_R21; // dst->ExtractTempReg(); + } + } + if (needCheckOv) + { + assert(!varTypeIsFloating(dst)); + + assert(REG_R21 != dst->GetRegNum()); + assert(REG_RA != dst->GetRegNum()); + + if (dst->GetRegNum() == regOp1) + { + assert(REG_R21 != regOp1); + assert(REG_RA != regOp1); + saveOperReg1 = REG_R21; + saveOperReg2 = regOp2; + emitIns_R_R_R(INS_or, attr, REG_R21, regOp1, REG_R0); + } + else if (dst->GetRegNum() == regOp2) + { + assert(REG_R21 != regOp2); + assert(REG_RA != regOp2); + saveOperReg1 = regOp1; + saveOperReg2 = REG_R21; + emitIns_R_R_R(INS_or, attr, REG_R21, regOp2, REG_R0); + } + else + { + saveOperReg1 = regOp1; + saveOperReg2 = regOp2; + } + } + + emitIns_R_R_R(ins, attr, dst->GetRegNum(), regOp1, regOp2); + + if (needCheckOv) + { + if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB) + { + ssize_t imm; + regNumber tempReg1; + regNumber tempReg2; + // ADD : A = B + C + // SUB : C = A - B + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + // if A < B, goto overflow + if (dst->OperGet() == GT_ADD) + { + tempReg1 = dst->GetRegNum(); + tempReg2 = saveOperReg1; + } + else + { + tempReg1 = saveOperReg1; + tempReg2 = saveOperReg2; + } + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2); + } + else + { + tempReg1 = REG_RA; + tempReg2 = dst->GetSingleTempReg(); + assert(tempReg1 != tempReg2); + assert(tempReg1 != saveOperReg1); + assert(tempReg2 != saveOperReg2); + + ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63; + if (dst->OperGet() == GT_ADD) + emitIns_R_R_I(INS_srli_d, attr, tempReg1, saveOperReg1, ui6); + else + emitIns_R_R_I(INS_srli_d, attr, tempReg1, dst->GetRegNum(), ui6); + emitIns_R_R_I(INS_srli_d, attr, tempReg2, saveOperReg2, ui6); + + emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2); + if (attr == EA_4BYTE) + { + imm = 1; + emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm); + emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm); + } + // if (B > 0 && C < 0) || (B < 0 && C > 0), skip overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel(); + + emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0); + + emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0); + + // B > 0 and C > 0, if A < B, goto overflow + emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, + dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2); + + codeGen->genDefineTempLabel(tmpLabel2); + + codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW); + + codeGen->genDefineTempLabel(tmpLabel3); + + // B < 0 and C < 0, if A > B, goto overflow + emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2, + dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1); + + codeGen->genDefineTempLabel(tmpLabel); + } + } + else + { +#ifdef DEBUG + printf("---------[LOONGARCH64]-NOTE: UnsignedOverflow instruction %d\n", ins); +#endif + assert(!"unimplemented on LOONGARCH yet"); + } + } + } + + return dst->GetRegNum(); +} + +unsigned emitter::get_curTotalCodeSize() +{ + return emitTotalCodeSize; +} + +#if defined(DEBUG) || defined(LATE_DISASM) + +//---------------------------------------------------------------------------------------- +// getInsExecutionCharacteristics: +// Returns the current instruction execution characteristics +// +// Arguments: +// id - The current instruction descriptor to be evaluated +// +// Return Value: +// A struct containing the current instruction execution characteristics +// +// Notes: +// The instruction latencies and throughput values returned by this function +// are NOT accurate and just a function feature. +emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id) +{ + insExecutionCharacteristics result; + + // TODO-LoongArch64: support this function. + result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; + result.insLatency = PERFSCORE_LATENCY_ZERO; + result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE; + + return result; +} + +#endif // defined(DEBUG) || defined(LATE_DISASM) + +#ifdef DEBUG +//------------------------------------------------------------------------ +// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. +// +// TODO-LoongArch64: supporting SIMD. +// Arguments: +// reg - A general-purpose register orfloating-point register. +// size - unused parameter. +// varName - unused parameter. +// +// Return value: +// A string that represents a general-purpose register name or floating-point scalar register name. +// +const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) +{ + assert(reg < REG_COUNT); + + const char* rn = nullptr; + + rn = RegNames[reg]; + assert(rn != nullptr); + + return rn; +} +#endif + +//------------------------------------------------------------------------ +// IsMovInstruction: Determines whether a give instruction is a move instruction +// +// Arguments: +// ins -- The instruction being checked +// +bool emitter::IsMovInstruction(instruction ins) +{ + switch (ins) + { + case INS_mov: + case INS_fmov_s: + case INS_fmov_d: + case INS_movgr2fr_w: + case INS_movgr2fr_d: + case INS_movfr2gr_s: + case INS_movfr2gr_d: + { + return true; + } + + default: + { + return false; + } + } +} +#endif // defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h new file mode 100644 index 0000000000000..d7e7cc5450acb --- /dev/null +++ b/src/coreclr/jit/emitloongarch64.h @@ -0,0 +1,241 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if defined(TARGET_LOONGARCH64) + +// The LOONGARCH64 instructions are all 32 bits in size. +// we use an unsigned int to hold the encoded instructions. +// This typedef defines the type that we use to hold encoded instructions. +// +typedef unsigned int code_t; + +/************************************************************************/ +/* Routines that compute the size of / encode instructions */ +/************************************************************************/ + +struct CnsVal +{ + ssize_t cnsVal; + bool cnsReloc; +}; + +#ifdef DEBUG + +/************************************************************************/ +/* Debug-only routines to display instructions */ +/************************************************************************/ + +const char* emitFPregName(unsigned reg, bool varName = true); +const char* emitVectorRegName(regNumber reg); + +void emitDisInsName(code_t code, const BYTE* addr, instrDesc* id); +#endif // DEBUG + +void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0); +void emitIns_I_la(emitAttr attr, regNumber reg, ssize_t imm); + +/************************************************************************/ +/* Private members that deal with target-dependent instr. descriptors */ +/************************************************************************/ + +private: +instrDesc* emitNewInstrCallDir(int argCnt, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + +instrDesc* emitNewInstrCallInd(int argCnt, + ssize_t disp, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + +/************************************************************************/ +/* Private helpers for instruction output */ +/************************************************************************/ + +private: +bool emitInsIsLoad(instruction ins); +bool emitInsIsStore(instruction ins); +bool emitInsIsLoadOrStore(instruction ins); + +emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/); + +// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset] +void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); + +// Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst' buffer +unsigned emitOutput_Instr(BYTE* dst, code_t code); + +// Method to do check if mov is redundant with respect to the last instruction. +// If yes, the caller of this method can choose to omit current mov instruction. +static bool IsMovInstruction(instruction ins); +bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); +bool IsRedundantLdStr( + instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end. + +/************************************************************************/ +/* Public inline informational methods */ +/************************************************************************/ + +public: +// Returns true if 'value' is a legal signed immediate 12 bit encoding. +static bool isValidSimm12(ssize_t value) +{ + return -(((int)1) << 11) <= value && value < (((int)1) << 11); +}; + +// Returns true if 'value' is a legal unsigned immediate 12 bit encoding. +static bool isValidUimm12(ssize_t value) +{ + return (0 == (value >> 12)); +} + +// Returns true if 'value' is a legal unsigned immediate 11 bit encoding. +static bool isValidUimm11(ssize_t value) +{ + return (0 == (value >> 11)); +} + +// Returns true if 'value' is a legal signed immediate 20 bit encoding. +static bool isValidSimm20(ssize_t value) +{ + return -(((int)1) << 19) <= value && value < (((int)1) << 19); +}; + +// Returns true if 'value' is a legal signed immediate 38 bit encoding. +static bool isValidSimm38(ssize_t value) +{ + return -(((ssize_t)1) << 37) <= value && value < (((ssize_t)1) << 37); +}; + +// Returns the number of bits used by the given 'size'. +inline static unsigned getBitWidth(emitAttr size) +{ + assert(size <= EA_8BYTE); + return (unsigned)size * BITS_PER_BYTE; +} + +inline static bool isGeneralRegister(regNumber reg) +{ + return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST); +} + +inline static bool isGeneralRegisterOrR0(regNumber reg) +{ + return (reg >= REG_FIRST) && (reg <= REG_INT_LAST); +} // Includes REG_R0 + +inline static bool isFloatReg(regNumber reg) +{ + return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); +} + +/************************************************************************/ +/* The public entry points to output instructions */ +/************************************************************************/ + +public: +void emitIns(instruction ins); + +void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); + +void emitIns_I(instruction ins, emitAttr attr, ssize_t imm); +void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs); + +void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE); + +void emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags) +{ + emitIns_R_R(ins, attr, reg1, reg2); +} + +void emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE); + +// Checks for a large immediate that needs a second instruction +void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm); + +void emitIns_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt = INS_OPTS_NONE, + emitAttr attrReg2 = EA_UNKNOWN); + +void emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); + +void emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs); + +void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); + +void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); + +void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); + +void emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); + +enum EmitCallType +{ + + // I have included here, but commented out, all the values used by the x86 emitter. + // However, LOONGARCH has a much reduced instruction set, and so the LOONGARCH emitter only + // supports a subset of the x86 variants. By leaving them commented out, it becomes + // a compile time error if code tries to use them (and hopefully see this comment + // and know why they are unavailible on LOONGARCH), while making it easier to stay + // in-sync with x86 and possibly add them back in if needed. + + EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method + // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method + // EC_FUNC_ADDR, // Direct call to an absolute address + + // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable) + EC_INDIR_R, // Indirect call via register + // EC_INDIR_SR, // Indirect call via stack-reference (local var) + // EC_INDIR_C, // Indirect call via static class var + // EC_INDIR_ARD, // Indirect call via an addressing mode + + EC_COUNT +}; + +void emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di, + regNumber ireg = REG_NA, + regNumber xreg = REG_NA, + unsigned xmul = 0, + ssize_t disp = 0, + bool isJump = false); + +unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code); + +unsigned get_curTotalCodeSize(); // bytes of code + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index 4982104acc749..02ab3bb879d6f 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -139,7 +139,7 @@ static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADE /* Interface for generating unwind information */ /************************************************************************/ -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL); @@ -151,7 +151,7 @@ void emitSplit(emitLocation* startLoc, void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || defined(TARGET_LOONGARCH64) #if defined(TARGET_ARM) diff --git a/src/coreclr/jit/error.h b/src/coreclr/jit/error.h index 450c24de3a456..618e5b3a7ee2d 100644 --- a/src/coreclr/jit/error.h +++ b/src/coreclr/jit/error.h @@ -174,6 +174,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) do { } while (0) #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) #elif defined(TARGET_X86) @@ -181,6 +182,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) NYIRAW("NYI_X86: " msg) #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) #elif defined(TARGET_ARM) @@ -188,6 +190,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) do { } while (0) #define NYI_ARM(msg) NYIRAW("NYI_ARM: " msg) #define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) #elif defined(TARGET_ARM64) @@ -195,10 +198,18 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) do { } while (0) #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) NYIRAW("NYI_ARM64: " msg) +#define NYI_LOONGARCH64(msg) do { } while (0) + +#elif defined(TARGET_LOONGARCH64) +#define NYI_AMD64(msg) do { } while (0) +#define NYI_X86(msg) do { } while (0) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) NYIRAW("NYI_LOONGARCH64: " msg) #else -#error "Unknown platform, not x86, ARM, or AMD64?" +#error "Unknown platform, not x86, ARM, LOONGARCH64 or AMD64?" #endif diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d111ae7ed3f4f..319100e943b4a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3277,6 +3277,27 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ *pCostSz += 4; } } +#elif defined(TARGET_LOONGARCH64) + if (base) + { + *pCostEx += base->GetCostEx(); + *pCostSz += base->GetCostSz(); + } + + if (idx) + { + *pCostEx += idx->GetCostEx(); + *pCostSz += idx->GetCostSz(); + } + if (cns != 0) + { + if (!emitter::isValidSimm12(cns)) + { + // TODO-LoongArch64-CQ: tune for LoongArch64. + *pCostEx += 1; + *pCostSz += 4; + } + } #else #error "Unknown TARGET" #endif @@ -3691,13 +3712,24 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) } goto COMMON_CNS; +#elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64-CQ: tune the costs. + case GT_CNS_STR: + costEx = IND_COST_EX + 2; + costSz = 4; + goto COMMON_CNS; + + case GT_CNS_LNG: + case GT_CNS_INT: + costEx = 1; + costSz = 4; + goto COMMON_CNS; #else case GT_CNS_STR: case GT_CNS_LNG: case GT_CNS_INT: #error "Unknown TARGET" #endif - COMMON_CNS: /* Note that some code below depends on constants always getting @@ -3753,6 +3785,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costEx = IND_COST_EX; costSz = 4; } +#elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64-CQ: tune the costs. + costEx = 2; + costSz = 8; #else #error "Unknown TARGET" #endif @@ -3926,6 +3962,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costEx = IND_COST_EX * 2; costSz = 6; } +#elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64-CQ: tune the costs. + costEx = 1; + costSz = 4; #else #error "Unknown TARGET" #endif @@ -6965,7 +7005,7 @@ bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp) } // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (!comp->opts.MinOpts() && ((divisorValue >= 3) || !isSignedDivide)) { // All checks pass we can perform the division operation using a reciprocal multiply. @@ -13823,7 +13863,6 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) case TYP_INT: assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); - // No GC pointer types should be folded here... assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet())); @@ -21851,6 +21890,44 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, m_regType[i] = comp->getJitGCType(gcPtrs[i]); } +#elif defined(TARGET_LOONGARCH64) + assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); + + uint32_t floatFieldFlags = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd); + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]); + + if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + { + comp->compFloatingPointUsed = true; + assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) + { + comp->compFloatingPointUsed = true; + assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; + } + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) + { + comp->compFloatingPointUsed = true; + assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } + else + { + for (unsigned i = 0; i < 2; ++i) + { + m_regType[i] = comp->getJitGCType(gcPtrs[i]); + } + } + #elif defined(TARGET_X86) // an 8-byte struct returned using two registers @@ -22041,6 +22118,27 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3 } +#elif defined(TARGET_LOONGARCH64) + var_types regType = GetReturnRegType(idx); + if (idx == 0) + { + resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // A0 or F0 + } + else + { + noway_assert(idx == 1); // Up to 2 return registers for two-float-field structs + + // If the first return register is from the same register file, return the one next to it. + if (varTypeIsIntegralOrI(regType)) + { + resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // A0 or A1 + } + else // varTypeUsesFloatReg(regType) + { + resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1 + } + } + #endif // TARGET_XXX assert(resultReg != REG_NA); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 0c2367dbb2012..5552e6ecc77a6 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4439,6 +4439,10 @@ struct GenTreeCall final : public GenTree bool HasMultiRegRetVal() const { #ifdef FEATURE_MULTIREG_RET +#if defined(TARGET_LOONGARCH64) + return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1); +#else + #if defined(TARGET_X86) || defined(TARGET_ARM) if (varTypeIsLong(gtType)) { @@ -4452,6 +4456,8 @@ struct GenTreeCall final : public GenTree } // Now it is a struct that is returned in registers. return GetReturnTypeDesc()->IsMultiRegRetType(); +#endif + #else // !FEATURE_MULTIREG_RET return false; #endif // !FEATURE_MULTIREG_RET diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 54cedb357958b..4ab1844b66d4d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -8541,7 +8541,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni return true; } -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Jit64 compat: if (callerRetType == TYP_VOID) { @@ -8571,7 +8571,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni { return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize); } -#endif // TARGET_AMD64 || TARGET_ARMARCH +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 return false; } @@ -10380,7 +10380,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv)); } -#elif FEATURE_MULTIREG_RET && defined(TARGET_ARM64) +#elif FEATURE_MULTIREG_RET && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) // Is method returning a multi-reg struct? if (IsMultiRegReturnedType(retClsHnd, unmgdCallConv)) @@ -10419,7 +10419,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv)); } -#endif // FEATURE_MULTIREG_RET && TARGET_ARM64 +#endif // FEATURE_MULTIREG_RET && (TARGET_ARM64 || TARGET_LOONGARCH64) if (!op->IsCall() || !op->AsCall()->TreatAsHasRetBufArg(this)) { @@ -14135,6 +14135,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } op1 = impPopStack().val; + impBashVarAddrsToI(op1); // Casts from floating point types must not have GTF_UNSIGNED set. @@ -17443,7 +17444,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) } } else -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) ReturnTypeDesc retTypeDesc; retTypeDesc.InitializeStructReturnType(this, retClsHnd, info.compCallConv); unsigned retRegCount = retTypeDesc.GetReturnRegCount(); @@ -20811,6 +20812,9 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) default: return false; } +#elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64: add some instrinsics. + return false; #else // TODO: This portion of logic is not implemented for other arch. // The reason for returning true is that on all other arch the only intrinsic diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 34d55023a3ce5..67ae437f03b75 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -66,6 +66,10 @@ const char* CodeGen::genInsName(instruction ins) #define INST9(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm, #include "instrs.h" +#elif defined(TARGET_LOONGARCH64) + #define INST(id, nm, ldst, e1) nm, + #include "instrs.h" + #else #error "Unknown TARGET" #endif @@ -420,7 +424,12 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s size = emitActualTypeSize(type); } +#ifdef TARGET_LOONGARCH64 + // inst_RV is not used for LoongArch64, so there is no need to define `emitIns_R`. + NYI_LOONGARCH64("inst_RV-----unused on LOONGARCH64----"); +#else GetEmitter()->emitIns_R(ins, size, reg); +#endif } /***************************************************************************** @@ -434,6 +443,31 @@ void CodeGen::inst_Mov(var_types dstType, emitAttr size, insFlags flags /* = INS_FLAGS_DONT_CARE */) { +#ifdef TARGET_LOONGARCH64 + if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg)) + { + if (dstType == TYP_FLOAT) + { + dstType = TYP_INT; + } + else if (dstType == TYP_DOUBLE) + { + dstType = TYP_LONG; + } + else if (dstType == TYP_INT) + { + dstType = TYP_FLOAT; + } + else if (dstType == TYP_LONG) + { + dstType = TYP_DOUBLE; + } + else + { + NYI_LOONGARCH64("CodeGen::inst_Mov dstType"); + } + } +#endif instruction ins = ins_Copy(srcReg, dstType); if (size == EA_UNKNOWN) @@ -523,7 +557,7 @@ void CodeGen::inst_RV_RV_RV(instruction ins, { #ifdef TARGET_ARM GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags); -#elif defined(TARGET_XARCH) +#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3); #else NYI("inst_RV_RV_RV"); @@ -599,6 +633,8 @@ void CodeGen::inst_RV_IV( assert(ins != INS_tst); assert(ins != INS_mov); GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val); #else // !TARGET_ARM #ifdef TARGET_AMD64 // Instead of an 8-byte immediate load, a 4-byte immediate will do fine @@ -1221,6 +1257,8 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) */ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) { + NYI_LOONGARCH64("ins_Move_Extend"); + instruction ins = INS_invalid; if (varTypeIsSIMD(srcType)) @@ -1426,6 +1464,19 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return INS_ldr; #elif defined(TARGET_ARM) return INS_vldr; +#elif defined(TARGET_LOONGARCH64) + if (srcType == TYP_DOUBLE) + { + return INS_fld_d; + } + else if (srcType == TYP_FLOAT) + { + return INS_fld_s; + } + else + { + assert(!"unhandled floating type"); + } #else assert(!varTypeIsFloating(srcType)); #endif @@ -1464,6 +1515,29 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* else ins = INS_ldrsh; } +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsByte(srcType)) + { + if (varTypeIsUnsigned(srcType)) + ins = INS_ld_bu; + else + ins = INS_ld_b; + } + else if (varTypeIsShort(srcType)) + { + if (varTypeIsUnsigned(srcType)) + ins = INS_ld_hu; + else + ins = INS_ld_h; + } + else if (TYP_INT == srcType) + { + ins = INS_ld_w; + } + else + { + ins = INS_ld_d; // default ld_d. + } #else NYI("ins_Load"); #endif @@ -1515,6 +1589,15 @@ instruction CodeGen::ins_Copy(var_types dstType) { return INS_mov; } +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(dstType)) + { + return dstType == TYP_FLOAT ? INS_fmov_s : INS_fmov_d; + } + else + { + return INS_mov; + } #else // TARGET_* #error "Unknown TARGET_" #endif @@ -1566,6 +1649,19 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) assert(dstType == TYP_INT); return INS_vmov_f2i; } +#elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64-CQ: supporting SIMD. + assert(!varTypeIsSIMD(dstType)); + if (dstIsFloatReg) + { + assert(!genIsValidFloatReg(srcReg)); + return dstType == TYP_FLOAT ? INS_movgr2fr_w : INS_movgr2fr_d; + } + else + { + assert(genIsValidFloatReg(srcReg)); + return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movfr2gr_s : INS_movfr2gr_d; + } #else // TARGET* #error "Unknown TARGET" #endif @@ -1578,6 +1674,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) * Parameters * dstType - destination type * aligned - whether destination is properly aligned if dstType is a SIMD type + * - for LoongArch64 aligned is used for store-index. */ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/) { @@ -1632,6 +1729,19 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false { return INS_vstr; } +#elif defined(TARGET_LOONGARCH64) + assert(!varTypeIsSIMD(dstType)); + if (varTypeIsFloating(dstType)) + { + if (dstType == TYP_DOUBLE) + { + return aligned ? INS_fstx_d : INS_fst_d; + } + else if (dstType == TYP_FLOAT) + { + return aligned ? INS_fstx_s : INS_fst_s; + } + } #else assert(!varTypeIsSIMD(dstType)); assert(!varTypeIsFloating(dstType)); @@ -1646,6 +1756,15 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = INS_strb; else if (varTypeIsShort(dstType)) ins = INS_strh; +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsByte(dstType)) + ins = aligned ? INS_stx_b : INS_st_b; + else if (varTypeIsShort(dstType)) + ins = aligned ? INS_stx_h : INS_st_h; + else if (TYP_INT == dstType) + ins = aligned ? INS_stx_w : INS_st_w; + else + ins = aligned ? INS_stx_d : INS_st_d; #else NYI("ins_Store"); #endif @@ -1923,6 +2042,8 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla GetEmitter()->emitIns_R_I(INS_mov, size, reg, 0 ARM_ARG(flags)); #elif defined(TARGET_ARM64) GetEmitter()->emitIns_Mov(INS_mov, size, reg, REG_ZR, /* canSkip */ true); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R_I(INS_ori, size, reg, REG_R0, 0); #else #error "Unknown TARGET" #endif diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index d694df71be479..a01492d08b8a9 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -6,7 +6,11 @@ #define _INSTR_H_ /*****************************************************************************/ +#ifdef TARGET_LOONGARCH64 +#define BAD_CODE 0XFFFFFFFF +#else #define BAD_CODE 0x0BADC0DE // better not match a real encoding! +#endif /*****************************************************************************/ @@ -47,6 +51,11 @@ enum instruction : unsigned INS_lea, // Not a real instruction. It is used for load the address of stack locals +#elif defined(TARGET_LOONGARCH64) + #define INST(id, nm, ldst, e1) INS_##id, + #include "instrs.h" + + INS_lea, // Not a real instruction. It is used for load the address of stack locals #else #error Unsupported target architecture #endif @@ -140,7 +149,7 @@ enum insFlags : uint32_t INS_FLAGS_DONT_CARE = 0x00, }; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // TODO-Cleanup: Move 'insFlags' under TARGET_ARM enum insFlags: unsigned { @@ -292,6 +301,33 @@ enum insBarrier : unsigned INS_BARRIER_ST = 14, INS_BARRIER_SY = 15, }; +#elif defined(TARGET_LOONGARCH64) +enum insOpts : unsigned +{ + INS_OPTS_NONE, + + INS_OPTS_RC, // see ::emitIns_R_C(). + INS_OPTS_RL, // see ::emitIns_R_L(). + INS_OPTS_JIRL, // see ::emitIns_J_R(). + INS_OPTS_J, // see ::emitIns_J(). + INS_OPTS_J_cond, // see ::emitIns_J_cond_la(). + INS_OPTS_I, // see ::emitIns_I_la(). + INS_OPTS_C, // see ::emitIns_Call(). + INS_OPTS_RELOC, // see ::emitIns_R_AI(). +}; + +enum insBarrier : unsigned +{ + // TODO-LOONGARCH64-CQ: ALL there are the same value right now. + // These are reserved for future extention. + // Because the LoongArch64 doesn't support these right now. + INS_BARRIER_FULL = 0, + INS_BARRIER_WMB = INS_BARRIER_FULL,//4, + INS_BARRIER_MB = INS_BARRIER_FULL,//16, + INS_BARRIER_ACQ = INS_BARRIER_FULL,//17, + INS_BARRIER_REL = INS_BARRIER_FULL,//18, + INS_BARRIER_RMB = INS_BARRIER_FULL,//19, +}; #endif #undef EA_UNKNOWN diff --git a/src/coreclr/jit/instrs.h b/src/coreclr/jit/instrs.h index b543f781645f5..aa16547f44be7 100644 --- a/src/coreclr/jit/instrs.h +++ b/src/coreclr/jit/instrs.h @@ -7,6 +7,8 @@ #include "instrsarm.h" #elif defined(TARGET_ARM64) #include "instrsarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "instrsloongarch64.h" #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h new file mode 100644 index 0000000000000..ada87672e397a --- /dev/null +++ b/src/coreclr/jit/instrsloongarch64.h @@ -0,0 +1,488 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/***************************************************************************** + * LoongArch64 instructions for JIT compiler + * + * id -- the enum name for the instruction + * nm -- textual name (for assembly dipslay) + * ld/st/cmp -- load/store/compare instruction + * encode -- encoding 1 + * +******************************************************************************/ + +#if !defined(TARGET_LOONGARCH64) +#error Unexpected target type +#endif + +#ifndef INST +#error INST must be defined before including this file. +#endif + +/*****************************************************************************/ +/* The following is LOONGARCH64-specific */ +/*****************************************************************************/ + +// If you're adding a new instruction: +// You need not only to fill in one of these macros describing the instruction, but also: +// * If the instruction writes to more than one destination register, update the function +// emitInsMayWriteMultipleRegs in emitLoongarch64.cpp. + +// clang-format off +INST(invalid, "INVALID", 0, BAD_CODE) +INST(nop , "nop", 0, 0x03400000) + + // INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number. +INST(bceqz, "bceqz", 0, 0x48000000) +INST(bcnez, "bcnez", 0, 0x48000100) + +INST(beq, "beq", 0, 0x58000000) +INST(bne, "bne", 0, 0x5c000000) + +INST(blt, "blt", 0, 0x60000000) +INST(bge, "bge", 0, 0x64000000) +INST(bltu, "bltu", 0, 0x68000000) +INST(bgeu, "bgeu", 0, 0x6c000000) + +////R_I. +INST(beqz, "beqz", 0, 0x40000000) +INST(bnez, "bnez", 0, 0x44000000) + +////I. +INST(b, "b", 0, 0x50000000) +INST(bl, "bl", 0, 0x54000000) + +/////////////////////////////////////////////////////////////////////////////////////////// +////NOTE: Begin +//// the following instructions will be used by emitter::emitInsMayWriteToGCReg(). +//////////////////////////////////////////////// +// enum name FP LD/ST FMT ENCODE +// +////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg(). +/////////////////////////////////////////////////////////////////////////////////////////// +// mov rd,rj +// In fact, mov is an alias instruction, "ori rd,rj,0" +INST(mov, "mov", 0, 0x03800000) + //dneg is a alias instruction. + //sub_d rd, zero, rk +INST(dneg, "dneg", 0, 0x00118000) + //neg is a alias instruction. + //sub_w rd, zero, rk +INST(neg, "neg", 0, 0x00110000) + //not is a alias instruction. + //nor rd, rj, zero +INST(not, "not", 0, 0x00140000) + +// enum:id name FP LD/ST Formate ENCODE +////R_R_R. +INST(add_w, "add.w", 0, 0x00100000) +INST(add_d, "add.d", 0, 0x00108000) +INST(sub_w, "sub.w", 0, 0x00110000) +INST(sub_d, "sub.d", 0, 0x00118000) + +INST(and, "and", 0, 0x00148000) +INST(or, "or", 0, 0x00150000) +INST(nor, "nor", 0, 0x00140000) +INST(xor, "xor", 0, 0x00158000) +INST(andn, "andn", 0, 0x00168000) +INST(orn, "orn", 0, 0x00160000) + +INST(mul_w, "mul.w", 0, 0x001c0000) +INST(mul_d, "mul.d", 0, 0x001d8000) +INST(mulh_w, "mulh.w", 0, 0x001c8000) +INST(mulh_wu, "mulh.wu", 0, 0x001d0000) +INST(mulh_d, "mulh.d", 0, 0x001e0000) +INST(mulh_du, "mulh.du", 0, 0x001e8000) +INST(mulw_d_w, "mulw.d.w", 0, 0x001f0000) +INST(mulw_d_wu, "mulw.d.wu", 0, 0x001f8000) +INST(div_w, "div.w", 0, 0x00200000) +INST(div_wu, "div.wu", 0, 0x00210000) +INST(div_d, "div.d", 0, 0x00220000) +INST(div_du, "div.du", 0, 0x00230000) +INST(mod_w, "mod.w", 0, 0x00208000) +INST(mod_wu, "mod.wu", 0, 0x00218000) +INST(mod_d, "mod.d", 0, 0x00228000) +INST(mod_du, "mod.du", 0, 0x00238000) + +INST(sll_w, "sll.w", 0, 0x00170000) +INST(srl_w, "srl.w", 0, 0x00178000) +INST(sra_w, "sra.w", 0, 0x00180000) +INST(rotr_w, "rotr_w", 0, 0x001b0000) +INST(sll_d, "sll.d", 0, 0x00188000) +INST(srl_d, "srl.d", 0, 0x00190000) +INST(sra_d, "sra.d", 0, 0x00198000) +INST(rotr_d, "rotr.d", 0, 0x001b8000) + +INST(maskeqz, "maskeqz", 0, 0x00130000) +INST(masknez, "masknez", 0, 0x00138000) + +INST(slt, "slt", 0, 0x00120000) +INST(sltu, "sltu", 0, 0x00128000) + +INST(amswap_w, "amswap.w", 0, 0x38600000) +INST(amswap_d, "amswap.d", 0, 0x38608000) +INST(amswap_db_w, "amswap_db.w", 0, 0x38690000) +INST(amswap_db_d, "amswap_db.d", 0, 0x38698000) +INST(amadd_w, "amadd.w", 0, 0x38610000) +INST(amadd_d, "amadd.d", 0, 0x38618000) +INST(amadd_db_w, "amadd_db.w", 0, 0x386a0000) +INST(amadd_db_d, "amadd_db.d", 0, 0x386a8000) +INST(amand_w, "amand.w", 0, 0x38620000) +INST(amand_d, "amand.d", 0, 0x38628000) +INST(amand_db_w, "amand_db.w", 0, 0x386b0000) +INST(amand_db_d, "amand_db.d", 0, 0x386b8000) +INST(amor_w, "amor.w", 0, 0x38630000) +INST(amor_d, "amor.d", 0, 0x38638000) +INST(amor_db_w, "amor_db.w", 0, 0x386c0000) +INST(amor_db_d, "amor_db.d", 0, 0x386c8000) +INST(amxor_w, "amxor.w", 0, 0x38640000) +INST(amxor_d, "amxor.d", 0, 0x38648000) +INST(amxor_db_w, "amxor_db.w", 0, 0x386d0000) +INST(amxor_db_d, "amxor_db.d", 0, 0x386d8000) +INST(ammax_w, "ammax.w", 0, 0x38650000) +INST(ammax_d, "ammax.d", 0, 0x38658000) +INST(ammax_db_w, "ammax_db.w", 0, 0x386e0000) +INST(ammax_db_d, "ammax_db.d", 0, 0x386e8000) +INST(ammin_w, "ammin.w", 0, 0x38660000) +INST(ammin_d, "ammin.d", 0, 0x38668000) +INST(ammin_db_w, "ammin_db.w", 0, 0x386f0000) +INST(ammin_db_d, "ammin_db.d", 0, 0x386f8000) +INST(ammax_wu, "ammax.wu", 0, 0x38670000) +INST(ammax_du, "ammax.du", 0, 0x38678000) +INST(ammax_db_wu, "ammax_db.wu", 0, 0x38700000) +INST(ammax_db_du, "ammax_db.du", 0, 0x38708000) +INST(ammin_wu, "ammin.wu", 0, 0x38680000) +INST(ammin_du, "ammin.du", 0, 0x38688000) +INST(ammin_db_wu, "ammin_db.wu", 0, 0x38710000) +INST(ammin_db_du, "ammin_db.du", 0, 0x38718000) + +INST(crc_w_b_w, "crc.w.b.w", 0, 0x00240000) +INST(crc_w_h_w, "crc.w.h.w", 0, 0x00248000) +INST(crc_w_w_w, "crc.w.w.w", 0, 0x00250000) +INST(crc_w_d_w, "crc.w.d.w", 0, 0x00258000) +INST(crcc_w_b_w, "crcc.w.b.w", 0, 0x00260000) +INST(crcc_w_h_w, "crcc.w.h.w", 0, 0x00268000) +INST(crcc_w_w_w, "crcc.w.w.w", 0, 0x00270000) +INST(crcc_w_d_w, "crcc.w.d.w", 0, 0x00278000) + +////R_R_R_I. +INST(alsl_w, "alsl.w", 0, 0x00040000) +INST(alsl_wu, "alsl.wu", 0, 0x00060000) +INST(alsl_d, "alsl.d", 0, 0x002c0000) + +INST(bytepick_w, "bytepick.w", 0, 0x00080000) +INST(bytepick_d, "bytepick.d", 0, 0x000c0000) + +INST(fsel, "fsel", 0, 0x0d000000) + +////R_I. +INST(lu12i_w, "lu12i.w", 0, 0x14000000) +INST(lu32i_d, "lu32i.d", 0, 0x16000000) + +INST(pcaddi, "pcaddi", 0, 0x18000000) +INST(pcaddu12i, "pcaddu12i", 0, 0x1c000000) +INST(pcalau12i, "pcalau12i", 0, 0x1a000000) +INST(pcaddu18i, "pcaddu18i", 0, 0x1e000000) + +////R_R. +INST(ext_w_b, "ext.w.b", 0, 0x00005c00) +INST(ext_w_h, "ext.w.h", 0, 0x00005800) +INST(clo_w, "clo.w", 0, 0x00001000) +INST(clz_w, "clz.w", 0, 0x00001400) +INST(cto_w, "cto.w", 0, 0x00001800) +INST(ctz_w, "ctz.w", 0, 0x00001c00) +INST(clo_d, "clo.d", 0, 0x00002000) +INST(clz_d, "clz.d", 0, 0x00002400) +INST(cto_d, "cto.d", 0, 0x00002800) +INST(ctz_d, "ctz.d", 0, 0x00002c00) +INST(revb_2h, "revb.2h", 0, 0x00003000) +INST(revb_4h, "revb.4h", 0, 0x00003400) +INST(revb_2w, "revb.2w", 0, 0x00003800) +INST(revb_d, "revb.d", 0, 0x00003c00) +INST(revh_2w, "revh.2w", 0, 0x00004000) +INST(revh_d, "revh.d", 0, 0x00004400) +INST(bitrev_4b, "bitrev.4b", 0, 0x00004800) +INST(bitrev_8b, "bitrev.8b", 0, 0x00004c00) +INST(bitrev_w, "bitrev.w", 0, 0x00005000) +INST(bitrev_d, "bitrev.d", 0, 0x00005400) +INST(rdtimel_w, "rdtimel.w", 0, 0x00006000) +INST(rdtimeh_w, "rdtimeh.w", 0, 0x00006400) +INST(rdtime_d, "rdtime.d", 0, 0x00006800) +INST(cpucfg, "cpucfg", 0, 0x00006c00) + +////R_R_I_I. +INST(bstrins_w, "bstrins.w", 0, 0x00600000) +INST(bstrins_d, "bstrins.d", 0, 0x00800000) +INST(bstrpick_w, "bstrpick.w", 0, 0x00608000) +INST(bstrpick_d, "bstrpick.d", 0, 0x00c00000) + +////Load. +INST(ld_b, "ld.b", LD, 0x28000000) +INST(ld_h, "ld.h", LD, 0x28400000) +INST(ld_w, "ld.w", LD, 0x28800000) +INST(ld_d, "ld.d", LD, 0x28c00000) +INST(ld_bu, "ld.bu", LD, 0x2a000000) +INST(ld_hu, "ld.hu", LD, 0x2a400000) +INST(ld_wu, "ld.wu", LD, 0x2a800000) + +INST(ldptr_w, "ldptr.w", LD, 0x24000000) +INST(ldptr_d, "ldptr.d", LD, 0x26000000) +INST(ll_w, "ll.w", 0, 0x20000000) +INST(ll_d, "ll.d", 0, 0x22000000) + +INST(ldx_b, "ldx.b", LD, 0x38000000) +INST(ldx_h, "ldx.h", LD, 0x38040000) +INST(ldx_w, "ldx.w", LD, 0x38080000) +INST(ldx_d, "ldx.d", LD, 0x380c0000) +INST(ldx_bu, "ldx.bu", LD, 0x38200000) +INST(ldx_hu, "ldx.hu", LD, 0x38240000) +INST(ldx_wu, "ldx.wu", LD, 0x38280000) + +INST(ldgt_b, "ldgt.b", 0, 0x38780000) +INST(ldgt_h, "ldgt.h", 0, 0x38788000) +INST(ldgt_w, "ldgt.w", 0, 0x38790000) +INST(ldgt_d, "ldgt.d", 0, 0x38798000) +INST(ldle_b, "ldle.b", 0, 0x387a0000) +INST(ldle_h, "ldle.h", 0, 0x387a8000) +INST(ldle_w, "ldle.w", 0, 0x387b0000) +INST(ldle_d, "ldle.d", 0, 0x387b8000) + +////R_R_I. +INST(addi_w, "addi.w", 0, 0x02800000) +INST(addi_d, "addi.d", 0, 0x02c00000) +INST(lu52i_d, "lu52i.d", 0, 0x03000000) +INST(slti, "slti", 0, 0x02000000) + +INST(sltui, "sltui", 0, 0x02400000) +INST(andi, "andi", 0, 0x03400000) +INST(ori, "ori", 0, 0x03800000) +INST(xori, "xori", 0, 0x03c00000) + +INST(slli_w, "slli.w", 0, 0x00408000) +INST(srli_w, "srli.w", 0, 0x00448000) +INST(srai_w, "srai.w", 0, 0x00488000) +INST(rotri_w, "rotri.w", 0, 0x004c8000) +INST(slli_d, "slli.d", 0, 0x00410000) +INST(srli_d, "srli.d", 0, 0x00450000) +INST(srai_d, "srai.d", 0, 0x00490000) +INST(rotri_d, "rotri.d", 0, 0x004d0000) + +INST(addu16i_d, "addu16i.d", 0, 0x10000000) + +INST(jirl, "jirl", 0, 0x4c000000) +//////////////////////////////////////////////////////////////////////////////////////////// +////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg(). +// +////NOTE: End +//// the above instructions will be used by emitter::emitInsMayWriteToGCReg(). +//////////////////////////////////////////////////////////////////////////////////////////// + +////Store. +INST(st_b, "st.b", ST, 0x29000000) +INST(st_h, "st.h", ST, 0x29400000) +INST(st_w, "st.w", ST, 0x29800000) +INST(st_d, "st.d", ST, 0x29c00000) + +INST(stptr_w, "stptr.w", ST, 0x25000000) +INST(stptr_d, "stptr.d", ST, 0x27000000) +INST(sc_w, "sc.w", 0, 0x21000000) +INST(sc_d, "sc.d", 0, 0x23000000) + +INST(stx_b, "stx.b", ST, 0x38100000) +INST(stx_h, "stx.h", ST, 0x38140000) +INST(stx_w, "stx.w", ST, 0x38180000) +INST(stx_d, "stx.d", ST, 0x381c0000) +INST(stgt_b, "stgt.b", 0, 0x387c0000) +INST(stgt_h, "stgt.h", 0, 0x387c8000) +INST(stgt_w, "stgt.w", 0, 0x387d0000) +INST(stgt_d, "stgt.d", 0, 0x387d8000) +INST(stle_b, "stle.b", 0, 0x387e0000) +INST(stle_h, "stle.h", 0, 0x387e8000) +INST(stle_w, "stle.w", 0, 0x387f0000) +INST(stle_d, "stle.d", 0, 0x387f8000) + +INST(dbar, "dbar", 0, 0x38720000) +INST(ibar, "ibar", 0, 0x38728000) + +INST(syscall, "syscall", 0, 0x002b0000) +INST(break, "break", 0, 0x002a0005) + +INST(asrtle_d, "asrtle.d", 0, 0x00010000) +INST(asrtgt_d, "asrtgt.d", 0, 0x00018000) + +INST(preld, "preld", LD, 0x2ac00000) +INST(preldx, "preldx", LD, 0x382c0000) + +////Float instructions. +////R_R_R. +INST(fadd_s, "fadd.s", 0, 0x01008000) +INST(fadd_d, "fadd.d", 0, 0x01010000) +INST(fsub_s, "fsub.s", 0, 0x01028000) +INST(fsub_d, "fsub.d", 0, 0x01030000) +INST(fmul_s, "fmul.s", 0, 0x01048000) +INST(fmul_d, "fmul.d", 0, 0x01050000) +INST(fdiv_s, "fdiv.s", 0, 0x01068000) +INST(fdiv_d, "fdiv.d", 0, 0x01070000) + +INST(fmax_s, "fmax.s", 0, 0x01088000) +INST(fmax_d, "fmax.d", 0, 0x01090000) +INST(fmin_s, "fmin.s", 0, 0x010a8000) +INST(fmin_d, "fmin.d", 0, 0x010b0000) +INST(fmaxa_s, "fmaxa.s", 0, 0x010c8000) +INST(fmaxa_d, "fmaxa.d", 0, 0x010d0000) +INST(fmina_s, "fmina.s", 0, 0x010e8000) +INST(fmina_d, "fmina.d", 0, 0x010f0000) + +INST(fscaleb_s, "fscaleb.s", 0, 0x01108000) +INST(fscaleb_d, "fscaleb.d", 0, 0x01110000) + +INST(fcopysign_s, "fcopysign.s", 0, 0x01128000) +INST(fcopysign_d, "fcopysign.d", 0, 0x01130000) + +INST(fldx_s, "fldx.s", LD, 0x38300000) +INST(fldx_d, "fldx.d", LD, 0x38340000) +INST(fstx_s, "fstx.s", ST, 0x38380000) +INST(fstx_d, "fstx.d", ST, 0x383c0000) + +INST(fldgt_s, "fldgt.s", 0, 0x38740000) +INST(fldgt_d, "fldgt.d", 0, 0x38748000) +INST(fldle_s, "fldle.s", 0, 0x38750000) +INST(fldle_d, "fldle.d", 0, 0x38758000) +INST(fstgt_s, "fstgt.s", 0, 0x38760000) +INST(fstgt_d, "fstgt.d", 0, 0x38768000) +INST(fstle_s, "fstle.s", 0, 0x38770000) +INST(fstle_d, "fstle.d", 0, 0x38778000) + +////R_R_R_R. +INST(fmadd_s, "fmadd.s", 0, 0x08100000) +INST(fmadd_d, "fmadd.d", 0, 0x08200000) +INST(fmsub_s, "fmsub.s", 0, 0x08500000) +INST(fmsub_d, "fmsub.d", 0, 0x08600000) +INST(fnmadd_s, "fnmadd.s", 0, 0x08900000) +INST(fnmadd_d, "fnmadd.d", 0, 0x08a00000) +INST(fnmsub_s, "fnmsub.s", 0, 0x08d00000) +INST(fnmsub_d, "fnmsub.d", 0, 0x08e00000) + +////R_R. +INST(fabs_s, "fabs.s", 0, 0x01140400) +INST(fabs_d, "fabs.d", 0, 0x01140800) +INST(fneg_s, "fneg.s", 0, 0x01141400) +INST(fneg_d, "fneg.d", 0, 0x01141800) + +INST(fsqrt_s, "fsqrt.s", 0, 0x01144400) +INST(fsqrt_d, "fsqrt.d", 0, 0x01144800) +INST(frsqrt_s, "frsqrt.s", 0, 0x01146400) +INST(frsqrt_d, "frsqrt.d", 0, 0x01146800) +INST(frecip_s, "frecip.s", 0, 0x01145400) +INST(frecip_d, "frecip.d", 0, 0x01145800) +INST(flogb_s, "flogb.s", 0, 0x01142400) +INST(flogb_d, "flogb.d", 0, 0x01142800) +INST(fclass_s, "fclass.s", 0, 0x01143400) +INST(fclass_d, "fclass.d", 0, 0x01143800) + +INST(fcvt_s_d, "fcvt.s.d", 0, 0x01191800) +INST(fcvt_d_s, "fcvt.d.s", 0, 0x01192400) +INST(ffint_s_w, "ffint.s.w", 0, 0x011d1000) +INST(ffint_s_l, "ffint.s.l", 0, 0x011d1800) +INST(ffint_d_w, "ffint.d.w", 0, 0x011d2000) +INST(ffint_d_l, "ffint.d.l", 0, 0x011d2800) +INST(ftint_w_s, "ftint.w.s", 0, 0x011b0400) +INST(ftint_w_d, "ftint.w.d", 0, 0x011b0800) +INST(ftint_l_s, "ftint.l.s", 0, 0x011b2400) +INST(ftint_l_d, "ftint.l.d", 0, 0x011b2800) +INST(ftintrm_w_s, "ftintrm.w.s", 0, 0x011a0400) +INST(ftintrm_w_d, "ftintrm.w.d", 0, 0x011a0800) +INST(ftintrm_l_s, "ftintrm.l.s", 0, 0x011a2400) +INST(ftintrm_l_d, "ftintrm.l.d", 0, 0x011a2800) +INST(ftintrp_w_s, "ftintrp.w.s", 0, 0x011a4400) +INST(ftintrp_w_d, "ftintrp.w.d", 0, 0x011a4800) +INST(ftintrp_l_s, "ftintrp.l.s", 0, 0x011a6400) +INST(ftintrp_l_d, "ftintrp.l.d", 0, 0x011a6800) +INST(ftintrz_w_s, "ftintrz.w.s", 0, 0x011a8400) +INST(ftintrz_w_d, "ftintrz.w.d", 0, 0x011a8800) +INST(ftintrz_l_s, "ftintrz.l.s", 0, 0x011aa400) +INST(ftintrz_l_d, "ftintrz.l.d", 0, 0x011aa800) +INST(ftintrne_w_s, "ftintrne.w.s", 0, 0x011ac400) +INST(ftintrne_w_d, "ftintrne.w.d", 0, 0x011ac800) +INST(ftintrne_l_s, "ftintrne.l.s", 0, 0x011ae400) +INST(ftintrne_l_d, "ftintrne.l.d", 0, 0x011ae800) +INST(frint_s, "frint.s", 0, 0x011e4400) +INST(frint_d, "frint.d", 0, 0x011e4800) + +INST(fmov_s, "fmov.s", 0, 0x01149400) +INST(fmov_d, "fmov.d", 0, 0x01149800) + +INST(movgr2fr_w, "movgr2fr.w", 0, 0x0114a400) +INST(movgr2fr_d, "movgr2fr.d", 0, 0x0114a800) +INST(movgr2frh_w, "movgr2frh.w", 0, 0x0114ac00) +INST(movfr2gr_s, "movfr2gr.s", 0, 0x0114b400) +INST(movfr2gr_d, "movfr2gr.d", 0, 0x0114b800) +INST(movfrh2gr_s, "movfrh2gr.s", 0, 0x0114bc00) + +//// +INST(movgr2fcsr, "movgr2fcsr", 0, 0x0114c000) +INST(movfcsr2gr, "movfcsr2gr", 0, 0x0114c800) +INST(movfr2cf, "movfr2cf", 0, 0x0114d000) +INST(movcf2fr, "movcf2fr", 0, 0x0114d400) +INST(movgr2cf, "movgr2cf", 0, 0x0114d800) +INST(movcf2gr, "movcf2gr", 0, 0x0114dc00) + +////R_R_I. +INST(fcmp_caf_s, "fcmp.caf.s", 0, 0x0c100000) +INST(fcmp_cun_s, "fcmp.cun.s", 0, 0x0c140000) +INST(fcmp_ceq_s, "fcmp.ceq.s", 0, 0x0c120000) +INST(fcmp_cueq_s, "fcmp.cueq.s", 0, 0x0c160000) +INST(fcmp_clt_s, "fcmp.clt.s", 0, 0x0c110000) +INST(fcmp_cult_s, "fcmp.cult.s", 0, 0x0c150000) +INST(fcmp_cle_s, "fcmp.cle.s", 0, 0x0c130000) +INST(fcmp_cule_s, "fcmp.cule.s", 0, 0x0c170000) +INST(fcmp_cne_s, "fcmp.cne.s", 0, 0x0c180000) +INST(fcmp_cor_s, "fcmp.cor.s", 0, 0x0c1a0000) +INST(fcmp_cune_s, "fcmp.cune.s", 0, 0x0c1c0000) + +INST(fcmp_saf_d, "fcmp.saf.d", 0, 0x0c208000) +INST(fcmp_sun_d, "fcmp.sun.d", 0, 0x0c248000) +INST(fcmp_seq_d, "fcmp.seq.d", 0, 0x0c228000) +INST(fcmp_sueq_d, "fcmp.sueq.d", 0, 0x0c268000) +INST(fcmp_slt_d, "fcmp.slt.d", 0, 0x0c218000) +INST(fcmp_sult_d, "fcmp.sult.d", 0, 0x0c258000) +INST(fcmp_sle_d, "fcmp.sle.d", 0, 0x0c238000) +INST(fcmp_sule_d, "fcmp.sule.d", 0, 0x0c278000) +INST(fcmp_sne_d, "fcmp.sne.d", 0, 0x0c288000) +INST(fcmp_sor_d, "fcmp.sor.d", 0, 0x0c2a8000) +INST(fcmp_sune_d, "fcmp.sune.d", 0, 0x0c2c8000) + +INST(fcmp_caf_d, "fcmp.caf.d", 0, 0x0c200000) +INST(fcmp_cun_d, "fcmp.cun.d", 0, 0x0c240000) +INST(fcmp_ceq_d, "fcmp.ceq.d", 0, 0x0c220000) +INST(fcmp_cueq_d, "fcmp.cueq.d", 0, 0x0c260000) +INST(fcmp_clt_d, "fcmp.clt.d", 0, 0x0c210000) +INST(fcmp_cult_d, "fcmp.cult.d", 0, 0x0c250000) +INST(fcmp_cle_d, "fcmp.cle.d", 0, 0x0c230000) +INST(fcmp_cule_d, "fcmp.cule.d", 0, 0x0c270000) +INST(fcmp_cne_d, "fcmp.cne.d", 0, 0x0c280000) +INST(fcmp_cor_d, "fcmp.cor.d", 0, 0x0c2a0000) +INST(fcmp_cune_d, "fcmp.cune.d", 0, 0x0c2c0000) + +INST(fcmp_saf_s, "fcmp.saf.s", 0, 0x0c108000) +INST(fcmp_sun_s, "fcmp.sun.s", 0, 0x0c148000) +INST(fcmp_seq_s, "fcmp.seq.s", 0, 0x0c128000) +INST(fcmp_sueq_s, "fcmp.sueq.s", 0, 0x0c168000) +INST(fcmp_slt_s, "fcmp.slt.s", 0, 0x0c118000) +INST(fcmp_sult_s, "fcmp.sult.s", 0, 0x0c158000) +INST(fcmp_sle_s, "fcmp.sle.s", 0, 0x0c138000) +INST(fcmp_sule_s, "fcmp.sule.s", 0, 0x0c178000) +INST(fcmp_sne_s, "fcmp.sne.s", 0, 0x0c188000) +INST(fcmp_sor_s, "fcmp.sor.s", 0, 0x0c1a8000) +INST(fcmp_sune_s, "fcmp.sune.s", 0, 0x0c1c8000) + +////R_R_I. +INST(fld_s, "fld.s", LD, 0x2b000000) +INST(fld_d, "fld.d", LD, 0x2b800000) +INST(fst_s, "fst.s", ST, 0x2b400000) +INST(fst_d, "fst.d", ST, 0x2bc00000) + +// clang-format on +/*****************************************************************************/ +#undef INST +/*****************************************************************************/ diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 1a1af89490d62..46945ed7eae7f 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -42,6 +42,9 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_X86 and HOST_ARM64 #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_X86 and HOST_LOONGARCH64 +#endif #elif defined(HOST_AMD64) #if defined(HOST_X86) #error Cannot define both HOST_AMD64 and HOST_X86 @@ -52,6 +55,9 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_AMD64 and HOST_ARM64 #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_AMD64 and HOST_LOONGARCH64 +#endif #elif defined(HOST_ARM) #if defined(HOST_X86) #error Cannot define both HOST_ARM and HOST_X86 @@ -62,6 +68,9 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_ARM and HOST_ARM64 #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_ARM and HOST_LOONGARCH64 +#endif #elif defined(HOST_ARM64) #if defined(HOST_X86) #error Cannot define both HOST_ARM64 and HOST_X86 @@ -72,6 +81,22 @@ #if defined(HOST_ARM) #error Cannot define both HOST_ARM64 and HOST_ARM #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_ARM64 and HOST_LOONGARCH64 +#endif +#elif defined(HOST_LOONGARCH64) +#if defined(HOST_X86) +#error Cannot define both HOST_LOONGARCH64 and HOST_X86 +#endif +#if defined(HOST_AMD64) +#error Cannot define both HOST_LOONGARCH64 and HOST_AMD64 +#endif +#if defined(HOST_ARM) +#error Cannot define both HOST_LOONGARCH64 and HOST_ARM +#endif +#if defined(HOST_ARM64) +#error Cannot define both HOST_LOONGARCH64 and HOST_ARM64 +#endif #else #error Unsupported or unset host architecture #endif @@ -86,6 +111,9 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_X86 and TARGET_ARM64 #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_X86 and TARGET_LOONGARCH64 +#endif #elif defined(TARGET_AMD64) #if defined(TARGET_X86) #error Cannot define both TARGET_AMD64 and TARGET_X86 @@ -96,6 +124,9 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_AMD64 and TARGET_ARM64 #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_AMD64 and TARGET_LOONGARCH64 +#endif #elif defined(TARGET_ARM) #if defined(TARGET_X86) #error Cannot define both TARGET_ARM and TARGET_X86 @@ -106,6 +137,9 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_ARM and TARGET_ARM64 #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_ARM and TARGET_LOONGARCH64 +#endif #elif defined(TARGET_ARM64) #if defined(TARGET_X86) #error Cannot define both TARGET_ARM64 and TARGET_X86 @@ -116,6 +150,22 @@ #if defined(TARGET_ARM) #error Cannot define both TARGET_ARM64 and TARGET_ARM #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_ARM64 and TARGET_LOONGARCH64 +#endif +#elif defined(TARGET_LOONGARCH64) +#if defined(TARGET_X86) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_X86 +#endif +#if defined(TARGET_AMD64) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_AMD64 +#endif +#if defined(TARGET_ARM) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM +#endif +#if defined(TARGET_ARM64) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM64 +#endif #else #error Unsupported or unset target architecture #endif @@ -163,6 +213,8 @@ #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARMNT #elif defined(TARGET_ARM64) #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64 +#elif defined(TARGET_LOONGARCH64) +#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0x6264 #else #error Unsupported or unset target architecture #endif @@ -207,6 +259,14 @@ #define UNIX_AMD64_ABI_ONLY(x) #endif // defined(UNIX_AMD64_ABI) +#if defined(TARGET_LOONGARCH64) +#define UNIX_LOONGARCH64_ONLY_ARG(x) , x +#define UNIX_LOONGARCH64_ONLY(x) x +#else // !TARGET_LOONGARCH64 +#define UNIX_LOONGARCH64_ONLY_ARG(x) +#define UNIX_LOONGARCH64_ONLY(x) +#endif // TARGET_LOONGARCH64 + #if defined(DEBUG) #define DEBUG_ARG_SLOTS #endif @@ -224,7 +284,7 @@ #define DEBUG_ARG_SLOTS_ASSERT(x) #endif -#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) +#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #define FEATURE_PUT_STRUCT_ARG_STK 1 #endif @@ -236,7 +296,7 @@ #define UNIX_AMD64_ABI_ONLY(x) #endif // defined(UNIX_AMD64_ABI) -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #define MULTIREG_HAS_SECOND_GC_RET 1 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x #define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index f92329e46f053..f3bef3cf01475 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -557,6 +557,11 @@ CONFIG_STRING(JitFunctionFile, W("JitFunctionFile")) // of the frame) CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSavedRegisters"), 0) #endif // defined(TARGET_ARM64) + +#if defined(TARGET_LOONGARCH64) +// Disable emitDispIns by default +CONFIG_INTEGER(JitDispIns, W("JitDispIns"), 0) +#endif // defined(TARGET_LOONGARCH64) #endif // DEBUG CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type. diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 9590279526924..3d338ff266df7 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -888,7 +888,7 @@ unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTr assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX); assert(ehGetDsc(finallyIndex)->HasFinallyHandler()); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion); #else *inTryRegion = true; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 8145cbcf2b6b6..4b2ce440126d9 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -659,7 +659,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un lvaSetClass(varDscInfo->varNum, clsHnd); } - // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers + // For ARM, ARM64, LOONGARCH64, and AMD64 varargs, all arguments go in integer registers var_types argType = mangleVarArgsType(varDsc->TypeGet()); var_types origArgType = argType; @@ -813,6 +813,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } #else // !TARGET_ARM + #if defined(UNIX_AMD64_ABI) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; if (varTypeIsStruct(argType)) @@ -873,9 +874,101 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); } else -#endif // defined(UNIX_AMD64_ABI) +#elif defined(TARGET_LOONGARCH64) + uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD; + var_types argRegTypeInStruct1 = TYP_UNKNOWN; + var_types argRegTypeInStruct2 = TYP_UNKNOWN; + + if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES)) + { + floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd); + } + + if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0) + { + assert(varTypeIsStruct(argType)); + int floatNum = 0; + if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + assert(argSize <= 8); + assert(varDsc->lvExactSize <= argSize); + + floatNum = 1; + canPassArgInRegisters = varDscInfo->canEnreg(argRegTypeInStruct1, 1); + + argRegTypeInStruct1 = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; + } + else if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) + { + floatNum = 2; + canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2); + + argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } + else if ((floatFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) + { + floatNum = 1; + canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); + canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); + + argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + } + else if ((floatFlags & STRUCT_FLOAT_FIELD_SECOND) != 0) + { + floatNum = 1; + canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); + canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); + + argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } + + assert((floatNum == 1) || (floatNum == 2)); + + if (!canPassArgInRegisters) + { + // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument, + // integer registers (if any) are used instead. + varDscInfo->setAllRegArgUsed(TYP_DOUBLE); + canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); + + argRegTypeInStruct1 = TYP_UNKNOWN; + argRegTypeInStruct2 = TYP_UNKNOWN; + + if (cSlotsToEnregister == 2) + { + if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1)) + { + // Here a struct-arg which needs two registers but only one integer register available, + // it has to be split. + argRegTypeInStruct1 = TYP_I_IMPL; + canPassArgInRegisters = true; + } + } + } + } + else +#endif // defined(TARGET_LOONGARCH64) { canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); +#if defined(TARGET_LOONGARCH64) + // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument, + // integer registers (if any) are used instead. + if (!canPassArgInRegisters && varTypeIsFloating(argType)) + { + canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); + argType = canPassArgInRegisters ? TYP_I_IMPL : argType; + } + if (!canPassArgInRegisters && (cSlots > 1)) + { + // If a struct-arg which needs two registers but only one integer register available, + // it has to be split. + canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); + argRegTypeInStruct1 = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; + } +#endif } if (canPassArgInRegisters) @@ -905,7 +998,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } else -#endif // defined(UNIX_AMD64_ABI) +#elif defined(TARGET_LOONGARCH64) + if (argRegTypeInStruct1 != TYP_UNKNOWN) + { + firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1); + } + else +#endif // defined(TARGET_LOONGARCH64) { firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); } @@ -953,6 +1052,40 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType)); } } +#elif defined(TARGET_LOONGARCH64) + if (argType == TYP_STRUCT) + { + if (argRegTypeInStruct1 != TYP_UNKNOWN) + { + varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1)); + varDsc->lvIs4Field1 = (genTypeSize(argRegTypeInStruct1) == 4) ? 1 : 0; + if (argRegTypeInStruct2 != TYP_UNKNOWN) + { + unsigned secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1); + varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2)); + varDsc->lvIs4Field2 = (genTypeSize(argRegTypeInStruct2) == 4) ? 1 : 0; + } + else if (cSlots > 1) + { + // Here a struct-arg which needs two registers but only one integer register available, + // it has to be split. But we reserved extra 8-bytes for the whole struct. + varDsc->lvIsSplit = 1; + varDsc->SetOtherArgReg(REG_STK); + varDscInfo->setAllRegArgUsed(argRegTypeInStruct1); +#if FEATURE_FASTTAILCALL + varDscInfo->stackArgSize += TARGET_POINTER_SIZE; +#endif + } + } + else + { + varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL)); + if (cSlots == 2) + { + varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL)); + } + } + } #else // ARM32 if (varTypeIsStruct(argType)) { @@ -1079,11 +1212,11 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDscInfo->setAnyFloatStackArgs(); } -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // If we needed to use the stack in order to pass this argument then // record the fact that we have used up any remaining registers of this 'type' - // This prevents any 'backfilling' from occuring on ARM64 + // This prevents any 'backfilling' from occuring on ARM64/LoongArch64. // varDscInfo->setAllRegArgUsed(argType); @@ -1357,7 +1490,12 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, #if defined(TARGET_AMD64) || defined(TARGET_ARM64) varDsc->lvIsImplicitByRef = 0; -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_LOONGARCH64) + varDsc->lvIsImplicitByRef = 0; + varDsc->lvIs4Field1 = 0; + varDsc->lvIs4Field2 = 0; + varDsc->lvIsSplit = 0; +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Set the lvType (before this point it is TYP_UNDEF). @@ -1688,7 +1826,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * FP_REGSIZE_BYTES; #endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) #else // !FEATURE_SIMD - const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); + const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); #endif // !FEATURE_SIMD assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized @@ -1993,7 +2131,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) JITDUMP("Not promoting multi-reg returned struct local V%02u with holes.\n", lclNum); shouldPromote = false; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // TODO-PERF - Only do this when the LclVar is used in an argument context // TODO-ARM64 - HFA support should also eliminate the need for this. // TODO-ARM32 - HFA support should also eliminate the need for this. @@ -2010,7 +2148,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) lclNum, structPromotionInfo.fieldCnt); shouldPromote = false; } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) { #if FEATURE_MULTIREG_STRUCT_PROMOTE @@ -2326,7 +2464,7 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) compiler->compLongUsed = true; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Reset the implicitByRef flag. fieldVarDsc->lvIsImplicitByRef = 0; @@ -2701,7 +2839,7 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg) return true; } -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (howToPassStruct == SPK_ByValue) { assert(type == TYP_STRUCT); @@ -2742,7 +2880,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; varDsc->lvType = impNormStructType(typeHnd, &simdBaseJitType); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Mark implicit byref struct parameters if (varDsc->lvIsParam && !varDsc->lvIsStructField) { @@ -2755,7 +2893,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool varDsc->lvIsImplicitByRef = 1; } } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #if FEATURE_SIMD if (simdBaseJitType != CORINFO_TYPE_UNDEF) @@ -3733,20 +3871,20 @@ size_t LclVarDsc::lvArgStackSize() const #if defined(WINDOWS_AMD64_ABI) // Structs are either passed by reference or can be passed by value using one pointer stackSize = TARGET_POINTER_SIZE; -#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // lvSize performs a roundup. stackSize = this->lvSize(); -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa())) { // If the size is greater than 16 bytes then it will // be passed by reference. stackSize = TARGET_POINTER_SIZE; } -#endif // defined(TARGET_ARM64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) -#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI +#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 NYI("Unsupported target."); unreached(); @@ -5274,14 +5412,14 @@ void Compiler::lvaFixVirtualFrameOffsets() // We set FP to be after LR, FP delta += 2 * REGSIZE_BYTES; } -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) else { // FP is used. JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } -#endif // TARGET_AMD64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 if (opts.IsOSR()) { @@ -5390,11 +5528,11 @@ void Compiler::lvaFixVirtualFrameOffsets() #endif // FEATURE_FIXED_OUT_ARGS -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // We normally add alignment below the locals between them and the outgoing - // arg space area. When we store fp/lr at the bottom, however, this will be - // below the alignment. So we should not apply the alignment adjustment to - // them. On ARM64 it turns out we always store these at +0 and +8 of the FP, + // arg space area. When we store fp/lr(ra) at the bottom, however, this will + // be below the alignment. So we should not apply the alignment adjustment to + // them. It turns out we always store these at +0 and +8 of the FP, // so instead of dealing with skipping adjustment just for them we just set // them here always. assert(codeGen->isFramePointerUsed()); @@ -5402,7 +5540,7 @@ void Compiler::lvaFixVirtualFrameOffsets() { lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); } -#endif +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } #ifdef TARGET_ARM @@ -5799,7 +5937,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, * when updating the current offset on the stack */ CLANG_FORMAT_COMMENT_ANCHOR; -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) #if DEBUG // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize // Also investigate why we are incrementing argOffs for X86 as this seems incorrect @@ -5907,6 +6045,17 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, varDsc->SetStackOffset(argOffs); argOffs += argSize; } + +#elif defined(TARGET_LOONGARCH64) + + if (varDsc->lvIsSplit) + { + assert((varDsc->lvType == TYP_STRUCT) && (varDsc->GetOtherArgReg() == REG_STK)); + // This is a split struct. It will account for an extra (8 bytes) for the whole struct. + varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE); + argOffs += TARGET_POINTER_SIZE; + } + #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -6187,7 +6336,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; } -#else // !TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) + + // Subtract off FP and RA. + assert(compCalleeRegsPushed >= 2); + stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; + +#else // !TARGET_LOONGARCH64 #ifdef TARGET_ARM // On ARM32 LR is part of the pushed registers and is always stored at the // top. @@ -6198,7 +6353,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #endif stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; -#endif // !TARGET_ARM64 +#endif // !TARGET_LOONGARCH64 // (2) Account for the remainder of the frame // @@ -6284,7 +6439,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_AMD64 -#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARMARCH) +#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)) if (lvaPSPSym != BAD_VAR_NUM) { // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including @@ -6293,7 +6448,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); } -#endif // FEATURE_EH_FUNCLETS && defined(TARGET_ARMARCH) +#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64) if (mustDoubleAlign) { @@ -6792,7 +6947,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Reserve the stack space for this variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // If we have an incoming register argument that has a struct promoted field // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar // @@ -6818,7 +6973,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() lvaTable[fieldVarNum + 1].SetStackOffset(varDsc->GetStackOffset() + 4); } #endif // TARGET_ARM -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } } @@ -6923,6 +7078,11 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_ARM64 +#if defined(TARGET_LOONGARCH64) + assert(isFramePointerUsed()); // Note that currently we always have a frame pointer + stkOffs -= 2 * REGSIZE_BYTES; +#endif // TARGET_LOONGARCH64 + #if FEATURE_FIXED_OUT_ARGS if (lvaOutgoingArgSpaceSize > 0) { @@ -7126,9 +7286,9 @@ void Compiler::lvaAlignFrame() lvaIncrementFrameSize(REGSIZE_BYTES); } -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) - // The stack on ARM64 must be 16 byte aligned. + // The stack on ARM64/LoongArch64 must be 16 byte aligned. // First, align up to 8. if ((compLclFrameSize % 8) != 0) @@ -7793,11 +7953,11 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) compCalleeRegsPushed = CNT_CALLEE_SAVED; -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (compFloatingPointUsed) compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT; - compCalleeRegsPushed++; // we always push LR. See genPushCalleeSavedRegisters + compCalleeRegsPushed++; // we always push LR/RA. See genPushCalleeSavedRegisters #elif defined(TARGET_AMD64) if (compFloatingPointUsed) { @@ -7829,12 +7989,12 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) lvaAssignFrameOffsets(curState); unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ; -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (compFloatingPointUsed) { calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ; } - calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR. See genPushCalleeSavedRegisters + calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR/RA. See genPushCalleeSavedRegisters #endif result = compLclFrameSize + calleeSavedRegMaxSz; @@ -8146,13 +8306,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* // Calculate padding unsigned padding = LCL_FLD_PADDING(lclNum); -#ifdef TARGET_ARMARCH - // We need to support alignment requirements to access memory on ARM ARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) + // We need to support alignment requirements to access memory. unsigned alignment = 1; pComp->codeGen->InferOpSizeAlign(lcl, &alignment); alignment = roundUp(alignment, TARGET_POINTER_SIZE); padding = roundUp(padding, alignment); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 // Change the variable to a TYP_BLK if (varType != TYP_BLK) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5951452102a4e..5bf6eca5c3997 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -254,7 +254,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerCast(node); break; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_BOUNDS_CHECK: ContainCheckBoundsChk(node->AsBoundsChk()); break; @@ -281,7 +281,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_LSH: case GT_RSH: case GT_RSZ: -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) LowerShift(node->AsOp()); #else ContainCheckShiftRotate(node->AsOp()); @@ -361,7 +361,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLocCommon(node->AsLclVarCommon()); break; -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); break; @@ -389,7 +389,7 @@ GenTree* Lowering::LowerNode(GenTree* node) break; #endif -#ifndef TARGET_ARMARCH +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARMARCH-CQ: We should contain this as long as the offset fits. case GT_OBJ: if (node->AsObj()->Addr()->OperIsLocalAddr()) @@ -1080,7 +1080,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf bool isOnStack = (info->GetRegNum() == REG_STK); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Mark contained when we pass struct // GT_FIELD_LIST is always marked contained when it is generated if (type == TYP_STRUCT) @@ -1461,7 +1461,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) #endif // !defined(TARGET_64BIT) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (call->IsVarargs() || comp->opts.compUseSoftFP) { // For vararg call or on armel, reg args should be all integer. @@ -1472,7 +1472,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) type = newNode->TypeGet(); } } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 GenTree* putArg = NewPutArg(call, arg, info, type); @@ -1486,9 +1486,9 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) } } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ -// LowerFloatArg: Lower float call arguments on the arm platform. +// LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform. // // Arguments: // arg - The arg node @@ -3189,7 +3189,7 @@ GenTree* Lowering::LowerCompare(GenTree* cmp) // GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) { -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) GenTree* relop = jtrue->gtGetOp1(); GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); @@ -3198,6 +3198,14 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) bool useJCMP = false; GenTreeFlags flags = GTF_EMPTY; +#if defined(TARGET_LOONGARCH64) + if (relop->OperIs(GT_EQ, GT_NE)) + { + // Codegen will use beq or bne. + flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY; + useJCMP = true; + } +#else // TARGET_ARM64 if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0)) { // Codegen will use cbz or cbnz in codegen which do not affect the flag register @@ -3210,6 +3218,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) flags = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : GTF_EMPTY); useJCMP = true; } +#endif // TARGET_ARM64 if (useJCMP) { @@ -3226,7 +3235,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) return nullptr; } } -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 ContainCheckJTrue(jtrue); @@ -3933,10 +3942,16 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store) assert(!call->HasMultiRegRetVal()); const ClassLayout* layout = store->GetLayout(); - const var_types regType = layout->GetRegisterType(); + var_types regType = layout->GetRegisterType(); if (regType != TYP_UNDEF) { +#if defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(call->TypeGet())) + { + regType = call->TypeGet(); + } +#endif store->ChangeType(regType); store->SetOper(GT_STOREIND); LowerStoreIndirCommon(store->AsStoreInd()); @@ -5485,7 +5500,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) return next; } -#ifndef TARGET_ARMARCH +#ifdef TARGET_XARCH if (BlockRange().TryGetUse(node, &use)) { // If this is a child of an indir, let the parent handle it. @@ -5496,7 +5511,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) TryCreateAddrMode(node, false, parent); } } -#endif // !TARGET_ARMARCH +#endif // TARGET_XARCH } if (node->OperIs(GT_ADD)) @@ -5613,7 +5628,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (!comp->opts.MinOpts() && (divisorValue >= 3)) { size_t magic; @@ -5693,7 +5708,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) GenTree* firstNode = nullptr; GenTree* adjustedDividend = dividend; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one. bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul; #else @@ -5901,7 +5916,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) return nullptr; } -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) ssize_t magic; int shift; diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 1a0bb8b5ed992..09d716ac1b5c7 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -161,7 +161,7 @@ class Lowering final : public Phase void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode); GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type); void LowerArg(GenTreeCall* call, GenTree** ppTree); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) GenTree* LowerFloatArg(GenTree** pArg, fgArgTabEntry* info); GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum); #endif diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp new file mode 100644 index 0000000000000..78ac528ba4c64 --- /dev/null +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -0,0 +1,829 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Lowering for LOONGARCH64 common code XX +XX XX +XX This encapsulates common logic for lowering trees for the LOONGARCH64 XX +XX architectures. For a more detailed view of what is lowering, please XX +XX take a look at Lower.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_LOONGARCH64 // This file is ONLY used for LOONGARCH64 architectures + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" +#include "lsra.h" + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" +#endif + +//------------------------------------------------------------------------ +// IsCallTargetInRange: Can a call target address be encoded in-place? +// +// Return Value: +// True if the addr fits into the range. +// +bool Lowering::IsCallTargetInRange(void* addr) +{ + // TODO-LOONGARCH64-CQ: using B/BL for optimization. + return false; +} + +//------------------------------------------------------------------------ +// IsContainableImmed: Is an immediate encodable in-place? +// +// Return Value: +// True if the immediate can be folded into an instruction, +// for example small enough and non-relocatable. +// +bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const +{ + if (!varTypeIsFloating(parentNode->TypeGet())) + { + // Make sure we have an actual immediate + if (!childNode->IsCnsIntOrI()) + return false; + if (childNode->AsIntCon()->ImmedValNeedsReloc(comp)) + return false; + + // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. + target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal; + + switch (parentNode->OperGet()) + { + case GT_CMPXCHG: + case GT_LOCKADD: + case GT_XADD: + NYI_LOONGARCH64("GT_CMPXCHG,GT_LOCKADD,GT_XADD"); + break; + + case GT_ADD: + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_BOUNDS_CHECK: + return emitter::isValidSimm12(immVal); + case GT_AND: + case GT_OR: + case GT_XOR: + return emitter::isValidUimm12(immVal); + case GT_JCMP: + assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); + return true; + + case GT_STORE_LCL_FLD: + case GT_STORE_LCL_VAR: + if (immVal == 0) + return true; + break; + + default: + break; + } + } + + return false; +} + +//------------------------------------------------------------------------ +// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node. +// +// Performs contaiment checks. +// +// TODO-LoongArch64-CQ: recognize GT_MULs that can be turned into MUL_LONGs, +// as those are cheaper. +// +// Arguments: +// mul - The node to lower +// +// Return Value: +// The next node to lower. +// +GenTree* Lowering::LowerMul(GenTreeOp* mul) +{ + assert(mul->OperIsMul()); + + ContainCheckMul(mul); + + return mul->gtNext; +} + +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (binOp->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); + } + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); + } + + if (notNode != nullptr) + { + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } + + ContainCheckBinary(binOp); + + return binOp->gtNext; +} + +//------------------------------------------------------------------------ +// LowerStoreLoc: Lower a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Widening operations of unsigneds. +// +void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + // Try to widen the ops if they are going into a local var. + GenTree* op1 = storeLoc->gtGetOp1(); + if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) + { + GenTreeIntCon* con = op1->AsIntCon(); + ssize_t ival = con->gtIconVal; + unsigned varNum = storeLoc->GetLclNum(); + LclVarDsc* varDsc = comp->lvaGetDesc(varNum); + + if (varDsc->lvIsSIMDType()) + { + noway_assert(storeLoc->gtType != TYP_STRUCT); + } + unsigned size = genTypeSize(storeLoc); + // If we are storing a constant into a local variable + // we extend the size of the store here + if ((size < 4) && !varTypeIsStruct(varDsc)) + { + if (!varTypeIsUnsigned(varDsc)) + { + if (genTypeSize(storeLoc) == 1) + { + if ((ival & 0x7f) != ival) + { + ival = ival | 0xffffff00; + } + } + else + { + assert(genTypeSize(storeLoc) == 2); + if ((ival & 0x7fff) != ival) + { + ival = ival | 0xffff0000; + } + } + } + + // A local stack slot is at least 4 bytes in size, regardless of + // what the local var is typed as, so auto-promote it here + // unless it is a field of a promoted struct + // TODO-CQ: if the field is promoted shouldn't we also be able to do this? + if (!varDsc->lvIsStructField) + { + storeLoc->gtType = TYP_INT; + con->SetIconValue(ival); + } + } + } + if (storeLoc->OperIs(GT_STORE_LCL_FLD)) + { + // We should only encounter this for lclVars that are lvDoNotEnregister. + verifyLclFldDoNotEnregister(storeLoc->GetLclNum()); + } + ContainCheckStoreLoc(storeLoc); +} + +//------------------------------------------------------------------------ +// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained. +// +// Arguments: +// node - The indirect store node (GT_STORE_IND) of interest +// +// Return Value: +// None. +// +void Lowering::LowerStoreIndir(GenTreeStoreInd* node) +{ + ContainCheckStoreIndir(node); +} + +//------------------------------------------------------------------------ +// LowerBlockStore: Set block store type +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +void Lowering::LowerBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = blkNode->Data(); + unsigned size = blkNode->Size(); + + if (blkNode->OperIsInitBlkOp()) + { + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (blkNode->OperIs(GT_STORE_OBJ)) + { + blkNode->SetOper(GT_STORE_BLK); + } + + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + // The fill value of an initblk is interpreted to hold a + // value of (unsigned int8) however a constant of any size + // may practically reside on the evaluation stack. So extract + // the lower byte out of the initVal constant and replicate + // it to a larger constant whose size is sufficient to support + // the largest width store of the desired inline expansion. + + ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; + if (fill == 0) + { + src->SetContained(); + } + else if (size >= REGSIZE_BYTES) + { + fill *= 0x0101010101010101LL; + src->gtType = TYP_LONG; + } + else + { + fill *= 0x01010101; + } + src->AsIntCon()->SetIconValue(fill); + + ContainBlockStoreAddress(blkNode, size, dstAddr); + } + else + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } + else + { + assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD)); + src->SetContained(); + + if (src->OperIs(GT_IND)) + { + // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained. + // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the + // address, not knowing that GT_IND is part of a block op that has containment restrictions. + src->AsIndir()->Addr()->ClearContained(); + } + else if (src->OperIs(GT_LCL_VAR)) + { + // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register. + const unsigned srcLclNum = src->AsLclVar()->GetLclNum(); + comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp)); + } + if (blkNode->OperIs(GT_STORE_OBJ)) + { + if (!blkNode->AsObj()->GetLayout()->HasGCPtr()) + { + blkNode->SetOper(GT_STORE_BLK); + } + else if (dstAddr->OperIsLocalAddr() && (size <= CPBLK_UNROLL_LIMIT)) + { + // If the size is small enough to unroll then we need to mark the block as non-interruptible + // to actually allow unrolling. The generated code does not report GC references loaded in the + // temporary register(s) used for copying. + blkNode->SetOper(GT_STORE_BLK); + blkNode->gtBlkOpGcUnsafe = true; + } + } + + // CopyObj or CopyBlk + if (blkNode->OperIs(GT_STORE_OBJ)) + { + assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////// + else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + if (src->OperIs(GT_IND)) + { + ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr()); + } + + ContainBlockStoreAddress(blkNode, size, dstAddr); + } + else + { + assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK)); + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } +} + +//------------------------------------------------------------------------ +// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store. +// +// Arguments: +// blkNode - the block store node +// size - the block size +// addr - the address node to try to contain +// +void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr) +{ + assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)); + assert(size < INT32_MAX); + + if (addr->OperIsLocalAddr()) + { + addr->SetContained(); + return; + } + + if (!addr->OperIs(GT_ADD) || addr->gtOverflow() || !addr->AsOp()->gtGetOp2()->OperIs(GT_CNS_INT)) + { + return; + } + + GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon(); + ssize_t offset = offsetNode->IconValue(); + + // TODO-LoongArch64: not including the ldptr and SIMD offset which not used right now. + if (!emitter::isValidSimm12(offset) || !emitter::isValidSimm12(offset + static_cast(size))) + { + return; + } + + if (!IsSafeToContainMem(blkNode, addr)) + { + return; + } + + BlockRange().Remove(offsetNode); + + addr->ChangeOper(GT_LEA); + addr->AsAddrMode()->SetIndex(nullptr); + addr->AsAddrMode()->SetScale(0); + addr->AsAddrMode()->SetOffset(static_cast(offset)); + addr->SetContained(); +} + +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +// Notes: +// Casts from float/double to a smaller int type are transformed as follows: +// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) +// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) +// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) +// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) +// +// Note that for the overflow conversions we still depend on helper calls and +// don't expect to see them here. +// i) GT_CAST(float/double, int type with overflow detection) +// + +void Lowering::LowerCast(GenTree* tree) +{ + assert(tree->OperGet() == GT_CAST); + + JITDUMP("LowerCast for: "); + DISPNODE(tree); + JITDUMP("\n"); + + GenTree* op1 = tree->AsOp()->gtOp1; + var_types dstType = tree->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + + if (varTypeIsFloating(srcType)) + { + noway_assert(!tree->gtOverflow()); + assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small + // int. + } + + assert(!varTypeIsSmall(srcType)); + + // Now determine if we have operands that should be contained. + ContainCheckCast(tree->AsCast()); +} + +//------------------------------------------------------------------------ +// LowerRotate: Lower GT_ROL and GT_ROR nodes. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::LowerRotate(GenTree* tree) +{ + if (tree->OperGet() == GT_ROL) + { + // Convert ROL into ROR. + GenTree* rotatedValue = tree->AsOp()->gtOp1; + unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; + GenTree* rotateLeftIndexNode = tree->AsOp()->gtOp2; + + if (rotateLeftIndexNode->IsCnsIntOrI()) + { + ssize_t rotateLeftIndex = rotateLeftIndexNode->AsIntCon()->gtIconVal; + ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; + rotateLeftIndexNode->AsIntCon()->gtIconVal = rotateRightIndex; + } + else + { + GenTree* tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode); + BlockRange().InsertAfter(rotateLeftIndexNode, tmp); + tree->AsOp()->gtOp2 = tmp; + } + tree->ChangeOper(GT_ROR); + } + ContainCheckShiftRotate(tree->AsOp()); +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::LowerSIMD(GenTreeSIMD* simdNode) +{ + NYI_LOONGARCH64("LowerSIMD"); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node. +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) +{ + NYI_LOONGARCH64("LowerHWIntrinsic"); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::IsValidConstForMovImm: Determines if the given node can be replaced by a mov/fmov immediate instruction +// +// Arguments: +// node - The hardware intrinsic node. +// +// Returns: +// true if the node can be replaced by a mov/fmov immediate instruction; otherwise, false +// +// IMPORTANT: +// This check may end up modifying node->gtOp1 if it is a cast node that can be removed +bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) +{ + NYI_LOONGARCH64("IsValidConstForMovImm"); + return false; +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NYI_LOONGARCH64("LowerHWIntrinsicCmpOp"); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) +{ + NYI_LOONGARCH64("LowerHWIntrinsicCreate"); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicDot: Lowers a Vector64 or Vector128 Dot call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) +{ + NYI_LOONGARCH64("LowerHWIntrinsicDot"); +} + +#endif // FEATURE_HW_INTRINSICS + +//------------------------------------------------------------------------ +// Containment analysis +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// ContainCheckCallOperands: Determine whether operands of a call should be contained. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// None. +// +void Lowering::ContainCheckCallOperands(GenTreeCall* call) +{ + // There are no contained operands for LoongArch64. +} + +//------------------------------------------------------------------------ +// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) +{ + GenTree* src = node->Data(); + if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) + { + // an integer zero for 'src' can be contained. + MakeSrcContained(node, src); + } + + ContainCheckIndir(node); +} + +//------------------------------------------------------------------------ +// ContainCheckIndir: Determine whether operands of an indir should be contained. +// +// Arguments: +// indirNode - The indirection node of interest +// +// Notes: +// This is called for both store and load indirections. +// +// Return Value: +// None. +// +void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) +{ + // If this is the rhs of a block copy it will be handled when we handle the store. + if (indirNode->TypeGet() == TYP_STRUCT) + { + return; + } + +#ifdef FEATURE_SIMD + NYI_LOONGARCH64("ContainCheckIndir-SIMD"); +#endif // FEATURE_SIMD + + GenTree* addr = indirNode->Addr(); + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) + { + MakeSrcContained(indirNode, addr); + } + else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // These nodes go into an addr mode: + // - GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR is a stack addr mode. + MakeSrcContained(indirNode, addr); + } + else if (addr->OperIs(GT_CLS_VAR_ADDR)) + { + // These nodes go into an addr mode: + // - GT_CLS_VAR_ADDR turns into a constant. + // make this contained, it turns into a constant that goes into an addr mode + MakeSrcContained(indirNode, addr); + } +} + +//------------------------------------------------------------------------ +// ContainCheckBinary: Determine whether a binary op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckBinary(GenTreeOp* node) +{ + // Check and make op2 contained (if it is a containable immediate) + CheckImmedAndMakeContained(node, node->gtOp2); +} + +//------------------------------------------------------------------------ +// ContainCheckMul: Determine whether a mul op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckMul(GenTreeOp* node) +{ + ContainCheckBinary(node); +} + +//------------------------------------------------------------------------ +// ContainCheckDivOrMod: determine which operands of a div/mod should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckDivOrMod(GenTreeOp* node) +{ + assert(node->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); +} + +//------------------------------------------------------------------------ +// ContainCheckShiftRotate: Determine whether a mul op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckShiftRotate(GenTreeOp* node) +{ + GenTree* shiftBy = node->gtOp2; + assert(node->OperIsShiftOrRotate()); + + if (shiftBy->IsCnsIntOrI()) + { + MakeSrcContained(node, shiftBy); + } +} + +//------------------------------------------------------------------------ +// ContainCheckStoreLoc: determine whether the source of a STORE_LCL* should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const +{ + assert(storeLoc->OperIsLocalStore()); + GenTree* op1 = storeLoc->gtGetOp1(); + + if (op1->OperIs(GT_BITCAST)) + { + // If we know that the source of the bitcast will be in a register, then we can make + // the bitcast itself contained. This will allow us to store directly from the other + // type if this node doesn't get a register. + GenTree* bitCastSrc = op1->gtGetOp1(); + if (!bitCastSrc->isContained() && !bitCastSrc->IsRegOptional()) + { + op1->SetContained(); + return; + } + } + + const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); + +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(storeLoc)) + { + // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR. + if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister) + { + // For an InitBlk we want op1 to be contained + MakeSrcContained(storeLoc, op1); + if (op1->IsSIMDZero()) + { + MakeSrcContained(op1, op1->gtGetOp1()); + } + } + return; + } +#endif // FEATURE_SIMD + if (IsContainableImmed(storeLoc, op1)) + { + MakeSrcContained(storeLoc, op1); + } + + // If the source is a containable immediate, make it contained, unless it is + // an int-size or larger store of zero to memory, because we can generate smaller code + // by zeroing a register and then storing it. + var_types type = varDsc->GetRegisterType(storeLoc); + if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type))) + { + MakeSrcContained(storeLoc, op1); + } +} + +//------------------------------------------------------------------------ +// ContainCheckCast: determine whether the source of a CAST node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckCast(GenTreeCast* node) +{ + // There are no contained operands for LoongArch64. +} + +//------------------------------------------------------------------------ +// ContainCheckCompare: determine whether the sources of a compare node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckCompare(GenTreeOp* cmp) +{ + CheckImmedAndMakeContained(cmp, cmp->gtOp2); +} + +//------------------------------------------------------------------------ +// ContainCheckBoundsChk: determine whether any source of a bounds check node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) +{ + assert(node->OperIs(GT_BOUNDS_CHECK)); + if (!CheckImmedAndMakeContained(node, node->GetIndex())) + { + CheckImmedAndMakeContained(node, node->GetArrayLength()); + } +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) +{ + NYI_LOONGARCH64("ContainCheckSIMD"); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +//---------------------------------------------------------------------------------------------- +// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) +{ + NYI_LOONGARCH64("ContainCheckHWIntrinsic"); +} +#endif // FEATURE_HW_INTRINSICS + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2a85c814adcb6..dbebd44596218 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -703,8 +703,10 @@ LinearScan::LinearScan(Compiler* theCompiler) enregisterLocalVars = compiler->compEnregLocals(); #ifdef TARGET_ARM64 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); +#elif TARGET_LOONGARCH64 + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); #endif #if ETW_EBP_FRAMED @@ -1571,11 +1573,19 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) #endif // FEATURE_SIMD case TYP_STRUCT: + { // TODO-1stClassStructs: support vars with GC pointers. The issue is that such // vars will have `lvMustInit` set, because emitter has poor support for struct liveness, // but if the variable is tracked the prolog generator would expect it to be in liveIn set, // so an assert in `genFnProlog` will fire. - return compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); + bool isRegCandidate = compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); +#ifdef TARGET_LOONGARCH64 + // The LoongArch64's ABI which the float args within a struct maybe passed by integer register + // when no float register left but free integer register. + isRegCandidate &= !genIsValidFloatReg(varDsc->GetOtherArgReg()); +#endif + return isRegCandidate; + } case TYP_UNDEF: case TYP_UNKNOWN: @@ -2576,7 +2586,7 @@ void LinearScan::setFrameType() compiler->rpFrameType = frameType; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Determine whether we need to reserve a register for large lclVar offsets. if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT)) { @@ -2586,7 +2596,7 @@ void LinearScan::setFrameType() JITDUMP(" Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD)); removeMask |= RBM_OPT_RSVD; } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0)) { @@ -2652,11 +2662,24 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* assert(refPosition->getInterval() == currentInterval); RegisterType regType = currentInterval->registerType; regMaskTP candidates = refPosition->registerAssignment; - +#ifdef TARGET_LOONGARCH64 + // The LoongArch64's ABI which the float args maybe passed by integer register + // when no float register left but free integer register. + if ((candidates & allRegs(regType)) != RBM_NONE) + { + return regType; + } + else + { + assert((regType == TYP_DOUBLE) || (regType == TYP_FLOAT)); + assert((candidates & allRegs(TYP_I_IMPL)) != RBM_NONE); + return TYP_I_IMPL; + } +#else assert((candidates & allRegs(regType)) != RBM_NONE); return regType; +#endif } - //------------------------------------------------------------------------ // isMatchingConstant: Check to see whether a given register contains the constant referenced // by the given RefPosition @@ -7684,7 +7707,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Next, if this blocks ends with a JCMP, we have to make sure: // 1. Not to copy into the register that JCMP uses // e.g. JCMP w21, BRANCH @@ -7722,7 +7745,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } } -#endif +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap; regMaskTP sameWriteRegs = RBM_NONE; @@ -7797,12 +7820,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) sameToReg = REG_NA; } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex)) { sameToReg = REG_NA; } -#endif +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // If the var is live only at those blocks connected by a split edge and not live-in at some of the // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 56f00145b68ba..86978763a2bf3 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -762,6 +762,9 @@ class LinearScan : public LinearScanInterface #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +#elif defined(TARGET_LOONGARCH64) + static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); + static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -1003,13 +1006,16 @@ class LinearScan : public LinearScanInterface bool isUse); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE -#if defined(UNIX_AMD64_ABI) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // For AMD64 on SystemV machines. This method // is called as replacement for raUpdateRegStateForArg // that is used on Windows. On System V systems a struct can be passed // partially using registers from the 2 register files. - void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); -#endif // defined(UNIX_AMD64_ABI) + // + // For LoongArch64's ABI, a struct can be passed + // partially using registers from the 2 register files. + void UpdateRegStateForStructArg(LclVarDsc* argDsc); +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 04df846e367e7..6d2a83a338284 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -593,7 +593,14 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, regNumber physicalReg = genRegNumFromMask(mask); RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); +#ifdef TARGET_LOONGARCH64 + // The LoongArch64's ABI which the float args maybe passed by integer register + // when no float register left but free integer register. + assert((regType(theInterval->registerType) == FloatRegisterType) || + (allRegs(theInterval->registerType) & mask) != 0); +#else assert((allRegs(theInterval->registerType) & mask) != 0); +#endif } RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); @@ -1987,9 +1994,11 @@ void LinearScan::insertZeroInitRefPositions() } } -#if defined(UNIX_AMD64_ABI) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ -// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems. +// UpdateRegStateForStructArg: +// Sets the register state for an argument of type STRUCT. +// This is shared between with AMD64's SystemV systems and LoongArch64-ABI. // // Arguments: // argDsc - the LclVarDsc for the argument of interest @@ -1998,7 +2007,7 @@ void LinearScan::insertZeroInitRefPositions() // See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp // for how state for argument is updated for unix non-structs and Windows AMD64 structs. // -void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc) { assert(varTypeIsStruct(argDsc)); RegState* intRegState = &compiler->codeGen->intRegState; @@ -2033,7 +2042,7 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) } } -#endif // defined(UNIX_AMD64_ABI) +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate @@ -2056,15 +2065,15 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) // void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) { -#if defined(UNIX_AMD64_ABI) - // For System V AMD64 calls the argDsc can have 2 registers (for structs.) - // Handle them here. +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) + // For SystemV-AMD64 and LoongArch64 calls the argDsc + // can have 2 registers (for structs.). Handle them here. if (varTypeIsStruct(argDsc)) { - unixAmd64UpdateRegStateForArg(argDsc); + UpdateRegStateForStructArg(argDsc); } else -#endif // defined(UNIX_AMD64_ABI) +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) { RegState* intRegState = &compiler->codeGen->intRegState; RegState* floatRegState = &compiler->codeGen->floatRegState; @@ -3976,6 +3985,13 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) addrCandidates = RBM_WRITE_BARRIER_DST; srcCandidates = RBM_WRITE_BARRIER_SRC; +#elif defined(TARGET_LOONGARCH64) + // the 'addr' goes into t6 (REG_WRITE_BARRIER_DST) + // the 'src' goes into t7 (REG_WRITE_BARRIER_SRC) + // + addrCandidates = RBM_WRITE_BARRIER_DST; + srcCandidates = RBM_WRITE_BARRIER_SRC; + #elif defined(TARGET_X86) && NOGC_WRITE_BARRIERS bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp new file mode 100644 index 0000000000000..2f259f7efffbd --- /dev/null +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -0,0 +1,1325 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Register Requirements for LOONGARCH64 XX +XX XX +XX This encapsulates all the logic for setting register requirements for XX +XX the LOONGARCH64 architecture. XX +XX XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_LOONGARCH64 + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" + +//------------------------------------------------------------------------ +// BuildNode: Build the RefPositions for for a node +// +// Arguments: +// treeNode - the node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Preconditions: +// LSRA Has been initialized. +// +// Postconditions: +// RefPositions have been built for all the register defs and uses required +// for this node. +// +int LinearScan::BuildNode(GenTree* tree) +{ + assert(!tree->isContained()); + int srcCount = 0; + int dstCount = 0; + regMaskTP dstCandidates = RBM_NONE; + regMaskTP killMask = RBM_NONE; + bool isLocalDefUse = false; + + // Reset the build-related members of LinearScan. + clearBuildState(); + + // Set the default dstCount. This may be modified below. + if (tree->IsValue()) + { + dstCount = 1; + if (tree->IsUnusedValue()) + { + isLocalDefUse = true; + } + } + else + { + dstCount = 0; + } + + switch (tree->OperGet()) + { + default: + srcCount = BuildSimple(tree); + break; + + case GT_LCL_VAR: + // We make a final determination about whether a GT_LCL_VAR is a candidate or contained + // after liveness. In either case we don't build any uses or defs. Otherwise, this is a + // load of a stack-based local into a register and we'll fall through to the general + // local case below. + if (checkContainedOrCandidateLclVar(tree->AsLclVar())) + { + return 0; + } + FALLTHROUGH; + case GT_LCL_FLD: + { + srcCount = 0; +#ifdef FEATURE_SIMD + // Need an additional register to read upper 4 bytes of Vector3. + if (tree->TypeGet() == TYP_SIMD12) + { + // We need an internal register different from targetReg in which 'tree' produces its result + // because both targetReg and internal reg will be in use at the same time. + buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + } +#endif + BuildDef(tree); + } + break; + + case GT_STORE_LCL_VAR: + if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar())) + { + dstCount = compiler->lvaGetDesc(tree->AsLclVar())->lvFieldCnt; + } + FALLTHROUGH; + + case GT_STORE_LCL_FLD: + srcCount = BuildStoreLoc(tree->AsLclVarCommon()); + break; + + case GT_FIELD_LIST: + // These should always be contained. We don't correctly allocate or + // generate code for a non-contained GT_FIELD_LIST. + noway_assert(!"Non-contained GT_FIELD_LIST"); + srcCount = 0; + break; + + case GT_ARGPLACE: + case GT_NO_OP: + case GT_START_NONGC: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_PROF_HOOK: + srcCount = 0; + assert(dstCount == 0); + killMask = getKillSetForProfilerHook(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_START_PREEMPTGC: + // This kills GC refs in callee save regs + srcCount = 0; + assert(dstCount == 0); + BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + break; + + case GT_CNS_DBL: + { + // There is no instruction for loading float/double imm directly into FPR. + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + } + FALLTHROUGH; + + case GT_CNS_INT: + { + srcCount = 0; + assert(dstCount == 1); + RefPosition* def = BuildDef(tree); + def->getInterval()->isConstant = true; + } + break; + + case GT_BOX: + case GT_COMMA: + case GT_QMARK: + case GT_COLON: + case GT_CLS_VAR: + case GT_ADDR: + srcCount = 0; + assert(dstCount == 0); + unreached(); + break; + + case GT_RETURN: + srcCount = BuildReturn(tree); + killMask = getKillSetForReturn(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_RETFILT: + assert(dstCount == 0); + if (tree->TypeGet() == TYP_VOID) + { + srcCount = 0; + } + else + { + assert(tree->TypeGet() == TYP_INT); + srcCount = 1; + BuildUse(tree->gtGetOp1(), RBM_INTRET); + } + break; + + case GT_NOP: + // A GT_NOP is either a passthrough (if it is void, or if it has + // a child), but must be considered to produce a dummy value if it + // has a type but no child. + srcCount = 0; + if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) + { + assert(dstCount == 1); + BuildDef(tree); + } + else + { + assert(dstCount == 0); + } + break; + + case GT_KEEPALIVE: + assert(dstCount == 0); + srcCount = BuildOperandUses(tree->gtGetOp1()); + break; + + case GT_JTRUE: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_JMP: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_SWITCH: + // This should never occur since switch nodes must not be visible at this + // point in the JIT. + srcCount = 0; + noway_assert(!"Switch must be lowered at this point"); + break; + + case GT_JMPTABLE: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_SWITCH_TABLE: + buildInternalIntRegisterDefForNode(tree); + srcCount = BuildBinaryUses(tree->AsOp()); + assert(dstCount == 0); + break; + + case GT_ASG: + noway_assert(!"We should never hit any assignment operator in lowering"); + srcCount = 0; + break; + + case GT_ADD: + case GT_SUB: + if (varTypeIsFloating(tree->TypeGet())) + { + // overflow operations aren't supported on float/double types. + assert(!tree->gtOverflow()); + + // No implicit conversions at this stage as the expectation is that + // everything is made explicit by adding casts. + assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); + } + else if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } + FALLTHROUGH; + + case GT_AND: + case GT_AND_NOT: + case GT_OR: + case GT_XOR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + srcCount = BuildBinaryUses(tree->AsOp()); + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_RETURNTRAP: + // this just turns into a compare of its child with an int + // + a conditional call + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 0); + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_MUL: + if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } + FALLTHROUGH; + + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_MULHI: + case GT_UDIV: + { + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalRegisterUses(); + assert(dstCount == 1); + BuildDef(tree); + } + break; + + case GT_INTRINSIC: + { + noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt)); + + // Both operand and its result must be of the same floating point type. + GenTree* op1 = tree->gtGetOp1(); + assert(varTypeIsFloating(op1)); + assert(op1->TypeGet() == tree->TypeGet()); + + BuildUse(op1); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + } + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + srcCount = BuildSIMD(tree->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_CAST: + assert(dstCount == 1); + srcCount = BuildCast(tree->AsCast()); + break; + + case GT_NEG: + case GT_NOT: + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_JCMP: + srcCount = BuildCmp(tree); + break; + + case GT_CKFINITE: + srcCount = 1; + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + BuildUse(tree->gtGetOp1()); + BuildDef(tree); + buildInternalRegisterUses(); + break; + + case GT_CMPXCHG: + { + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + } + break; + + case GT_LOCKADD: + case GT_XORR: + case GT_XAND: + case GT_XADD: + case GT_XCHG: + { + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + } + break; + +#if FEATURE_ARG_SPLIT + case GT_PUTARG_SPLIT: + srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); + dstCount = tree->AsPutArgSplit()->gtNumRegs; + break; +#endif // FEATURE_ARG_SPLIT + + case GT_PUTARG_STK: + srcCount = BuildPutArgStk(tree->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + srcCount = BuildPutArgReg(tree->AsUnOp()); + break; + + case GT_CALL: + srcCount = BuildCall(tree->AsCall()); + if (tree->AsCall()->HasMultiRegRetVal()) + { + dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); + } + break; + + case GT_BLK: + // These should all be eliminated prior to Lowering. + assert(!"Non-store block node in Lowering"); + srcCount = 0; + break; + + case GT_STORE_BLK: + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + srcCount = BuildBlockStore(tree->AsBlk()); + break; + + case GT_INIT_VAL: + // Always a passthrough of its child's value. + assert(!"INIT_VAL should always be contained"); + srcCount = 0; + break; + + case GT_LCLHEAP: + { + assert(dstCount == 1); + + // Need a variable number of temp regs (see genLclHeap() in codegenloongarch64.cpp): + // Here '-' means don't care. + // + // Size? Init Memory? # temp regs + // 0 - 0 + // const and <=UnrollLimit - 0 + // const and UnrollLimit Yes 0 + // Non-const Yes 0 + // Non-const No 2 + // + + GenTree* size = tree->gtGetOp1(); + if (size->IsCnsIntOrI()) + { + assert(size->isContained()); + srcCount = 0; + + size_t sizeVal = size->AsIntCon()->gtIconVal; + + if (sizeVal != 0) + { + // Compute the amount of memory to properly STACK_ALIGN. + // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. + // This should also help in debugging as we can examine the original size specified with + // localloc. + sizeVal = AlignUp(sizeVal, STACK_ALIGN); + + // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc) + // TODO-LoongArch64: maybe use paird-load/store or SIMD in future. + if (sizeVal <= (REGSIZE_BYTES * 2 * 4)) + { + // Need no internal registers + } + else if (!compiler->info.compInitMem) + { + // No need to initialize allocated stack space. + if (sizeVal < compiler->eeGetPageSize()) + { + // Need no internal registers + } + else + { + // We need two registers: regCnt and RegTmp + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + } + } + } + else + { + srcCount = 1; + if (!compiler->info.compInitMem) + { + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + } + + if (!size->isContained()) + { + BuildUse(size); + } + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_BOUNDS_CHECK: + { + GenTreeBoundsChk* node = tree->AsBoundsChk(); + // Consumes arrLen & index - has no result + assert(dstCount == 0); + srcCount = BuildOperandUses(node->GetIndex()); + srcCount += BuildOperandUses(node->GetArrayLength()); + } + break; + + case GT_ARR_ELEM: + // These must have been lowered to GT_ARR_INDEX + noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_ARR_INDEX: + { + srcCount = 2; + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + + // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple + // times while the result is being computed. + RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); + setDelayFree(arrObjUse); + BuildUse(tree->AsArrIndex()->IndexExpr()); + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_ARR_OFFSET: + // This consumes the offset, if any, the arrObj and the effective index, + // and produces the flattened offset for this dimension. + srcCount = 2; + if (!tree->AsArrOffs()->gtOffset->isContained()) + { + BuildUse(tree->AsArrOffs()->gtOffset); + srcCount++; + } + BuildUse(tree->AsArrOffs()->gtIndex); + BuildUse(tree->AsArrOffs()->gtArrObj); + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + case GT_LEA: + { + GenTreeAddrMode* lea = tree->AsAddrMode(); + + GenTree* base = lea->Base(); + GenTree* index = lea->Index(); + int cns = lea->Offset(); + + // This LEA is instantiating an address, so we set up the srcCount here. + srcCount = 0; + if (base != nullptr) + { + srcCount++; + BuildUse(base); + } + if (index != nullptr) + { + srcCount++; + BuildUse(index); + } + assert(dstCount == 1); + + // On LOONGARCH64 we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // LOONGARCH64 does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(tree); + } + else if (!emitter::isValidSimm12(cns)) + { + // This offset can't be contained in the add instruction, so we need an internal register + buildInternalIntRegisterDefForNode(tree); + } + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_STOREIND: + { + assert(dstCount == 0); + + if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) + { + srcCount = BuildGCWriteBarrier(tree); + break; + } + + srcCount = BuildIndir(tree->AsIndir()); + if (!tree->gtGetOp2()->isContained()) + { + BuildUse(tree->gtGetOp2()); + srcCount++; + } + } + break; + + case GT_NULLCHECK: + case GT_IND: + assert(dstCount == (tree->OperIs(GT_NULLCHECK) ? 0 : 1)); + srcCount = BuildIndir(tree->AsIndir()); + break; + + case GT_CATCH_ARG: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree, RBM_EXCEPTION_OBJECT); + break; + + case GT_INDEX_ADDR: + assert(dstCount == 1); + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + } // end switch (tree->OperGet()) + + if (tree->IsUnusedValue() && (dstCount != 0)) + { + isLocalDefUse = true; + } + // We need to be sure that we've set srcCount and dstCount appropriately + assert((dstCount < 2) || tree->IsMultiRegNode()); + assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); + assert(!tree->IsUnusedValue() || (dstCount != 0)); + assert(dstCount == tree->GetRegisterDstCount(compiler)); + return srcCount; +} + +#ifdef FEATURE_SIMD +//------------------------------------------------------------------------ +// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. +// +// Arguments: +// tree - The GT_SIMD node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) +{ + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + return 0; +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" +//------------------------------------------------------------------------ +// BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree. +// +// Arguments: +// tree - The GT_HWINTRINSIC node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) +{ + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + return 0; +} +#endif + +//------------------------------------------------------------------------ +// BuildIndir: Specify register requirements for address expression +// of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND or block gentree node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildIndir(GenTreeIndir* indirTree) +{ + // struct typed indirs are expected only on rhs of a block copy, + // but in this case they must be contained. + assert(indirTree->TypeGet() != TYP_STRUCT); + + GenTree* addr = indirTree->Addr(); + GenTree* index = nullptr; + int cns = 0; + + if (addr->isContained()) + { + if (addr->OperGet() == GT_LEA) + { + GenTreeAddrMode* lea = addr->AsAddrMode(); + index = lea->Index(); + cns = lea->Offset(); + + // On LOONGARCH we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // LOONGARCH does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + else if (!emitter::isValidSimm12(cns)) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + } + else if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(indirTree); + } + } + +#ifdef FEATURE_SIMD + if (indirTree->TypeGet() == TYP_SIMD12) + { + // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). + assert(!addr->isContained()); + + // Vector3 is read/written as two reads/writes: 8 byte and 4 byte. + // To assemble the vector properly we would need an additional int register + buildInternalIntRegisterDefForNode(indirTree); + } +#endif // FEATURE_SIMD + + int srcCount = BuildIndirUses(indirTree); + buildInternalRegisterUses(); + + if (!indirTree->OperIs(GT_STOREIND, GT_NULLCHECK)) + { + BuildDef(indirTree); + } + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildCall: Set the NodeInfo for a call. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildCall(GenTreeCall* call) +{ + bool hasMultiRegRetVal = false; + const ReturnTypeDesc* retTypeDesc = nullptr; + regMaskTP dstCandidates = RBM_NONE; + + int srcCount = 0; + int dstCount = 0; + if (call->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = call->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = call->GetReturnTypeDesc(); + dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + dstCount = 1; + } + } + + GenTree* ctrlExpr = call->gtControlExpr; + regMaskTP ctrlExprCandidates = RBM_NONE; + if (call->gtCallType == CT_INDIRECT) + { + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; + } + + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) + { + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + // Fast tail call - make sure that call target is always computed in volatile registers + // that will not be overridden by epilog sequence. + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(ctrlExprCandidates != RBM_NONE); + } + } + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM + // and will load call address into the temp register from this register. + regMaskTP candidates = RBM_NONE; + if (call->IsFastTailCall()) + { + candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(candidates != RBM_NONE); + } + + buildInternalIntRegisterDefForNode(call, candidates); + } + + RegisterType registerType = call->TypeGet(); + + // Set destination candidates for return value of the call. + + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + dstCandidates = retTypeDesc->GetABIReturnRegs(); + } + else if (varTypeUsesFloatArgReg(registerType)) + { + dstCandidates = RBM_FLOATRET; + } + else if (registerType == TYP_LONG) + { + dstCandidates = RBM_LNGRET; + } + else + { + dstCandidates = RBM_INTRET; + } + + // First, count reg args + // Each register argument corresponds to one source. + bool callHasFloatRegArgs = false; + + for (GenTreeCall::Use& arg : call->LateArgs()) + { + GenTree* argNode = arg.GetNode(); + +#ifdef DEBUG + // During Build, we only use the ArgTabEntry for validation, + // as getting it is rather expensive. + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + regNumber argReg = curArgTabEntry->GetRegNum(); + assert(curArgTabEntry != nullptr); +#endif + + if (argNode->gtOper == GT_PUTARG_STK) + { + // late arg that is not passed in a register + assert(curArgTabEntry->GetRegNum() == REG_STK); + // These should never be contained. + assert(!argNode->isContained()); + continue; + } + + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (argNode->OperGet() == GT_FIELD_LIST) + { + assert(argNode->isContained()); + + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { +#ifdef DEBUG + assert(use.GetNode()->OperIs(GT_PUTARG_REG)); + assert(use.GetNode()->GetRegNum() == argReg); + // Update argReg for the next putarg_reg (if any) + argReg = genRegArgNext(argReg); +#endif + BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + srcCount++; + } + } +#if FEATURE_ARG_SPLIT + else if (argNode->OperGet() == GT_PUTARG_SPLIT) + { + unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; + assert(regCount == curArgTabEntry->numRegs); + for (unsigned int i = 0; i < regCount; i++) + { + BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); + } + srcCount += regCount; + } +#endif // FEATURE_ARG_SPLIT + else + { + assert(argNode->OperIs(GT_PUTARG_REG)); + assert(argNode->GetRegNum() == argReg); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + { + BuildUse(argNode, genRegMask(argNode->GetRegNum())); + srcCount++; + } + } + } + +#ifdef DEBUG + // Now, count stack args + // Note that these need to be computed into a register, but then + // they're just stored to the stack - so the reg doesn't + // need to remain live until the call. In fact, it must not + // because the code generator doesn't actually consider it live, + // so it can't be spilled. + + for (GenTreeCall::Use& use : call->Args()) + { + GenTree* arg = use.GetNode(); + + // Skip arguments that have been moved to the Late Arg list + if ((arg->gtFlags & GTF_LATE_ARG) == 0) + { + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry != nullptr); +#if FEATURE_ARG_SPLIT + // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they + // define registers used by the call. + assert(arg->OperGet() != GT_PUTARG_SPLIT); +#endif // FEATURE_ARG_SPLIT + if (arg->gtOper == GT_PUTARG_STK) + { + assert(curArgTabEntry->GetRegNum() == REG_STK); + } + else + { + assert(!arg->IsValue() || arg->IsUnusedValue()); + } + } + } +#endif // DEBUG + + // If it is a fast tail call, it is already preferenced to use IP0. + // Therefore, no need set src candidates on call tgt again. + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + { + // Don't assign the call target to any of the argument registers because + // we will use them to also pass floating point arguments as required + // by LOONGARCH64 ABI. + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + } + + if (ctrlExpr != nullptr) + { + BuildUse(ctrlExpr, ctrlExprCandidates); + srcCount++; + } + + buildInternalRegisterUses(); + + // Now generate defs and kills. + regMaskTP killMask = getKillSetForCall(call); + BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTree* putArgChild = argNode->gtGetOp1(); + + int srcCount = 0; + + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST)) + { + // We will use store instructions that each write a register sized value + + if (putArgChild->OperIs(GT_FIELD_LIST)) + { + assert(putArgChild->isContained()); + // We consume all of the items in the GT_FIELD_LIST + for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + { + BuildUse(use.GetNode()); + srcCount++; + } + } + else + { + // We can use a ld/st sequence so we need two internal registers for LOONGARCH64. + buildInternalIntRegisterDefForNode(argNode); + buildInternalIntRegisterDefForNode(argNode); + + if (putArgChild->OperGet() == GT_OBJ) + { + assert(putArgChild->isContained()); + GenTree* objChild = putArgChild->gtGetOp1(); + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation, and there are no source registers. + // + assert(objChild->isContained()); + } + else + { + // We will generate all of the code for the GT_PUTARG_STK and its child node + // as one contained operation + // + srcCount = BuildOperandUses(objChild); + } + } + else + { + // No source registers. + putArgChild->OperIs(GT_LCL_VAR); + } + } + } + else + { + assert(!putArgChild->isContained()); + srcCount = BuildOperandUses(putArgChild); + } + buildInternalRegisterUses(); + return srcCount; +} + +#if FEATURE_ARG_SPLIT +//------------------------------------------------------------------------ +// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node +// +// Arguments: +// argNode - a GT_PUTARG_SPLIT node +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Set the child node(s) to be contained +// +int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) +{ + int srcCount = 0; + assert(argNode->gtOper == GT_PUTARG_SPLIT); + + GenTree* putArgChild = argNode->gtGetOp1(); + + // Registers for split argument corresponds to source + int dstCount = argNode->gtNumRegs; + + regNumber argReg = argNode->GetRegNum(); + regMaskTP argMask = RBM_NONE; + for (unsigned i = 0; i < argNode->gtNumRegs; i++) + { + regNumber thisArgReg = (regNumber)((unsigned)argReg + i); + argMask |= genRegMask(thisArgReg); + argNode->SetRegNumByIdx(thisArgReg, i); + } + + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + // Generated code: + // 1. Consume all of the items in the GT_FIELD_LIST (source) + // 2. Store to target slot and move to target registers (destination) from source + // + unsigned sourceRegCount = 0; + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + + for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + { + GenTree* node = use.GetNode(); + assert(!node->isContained()); + // The only multi-reg nodes we should see are OperIsMultiRegOp() + assert(!node->IsMultiRegNode()); + + // Consume all the registers, setting the appropriate register mask for the ones that + // go into registers. + regMaskTP sourceMask = RBM_NONE; + if (sourceRegCount < argNode->gtNumRegs) + { + sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + } + sourceRegCount++; + BuildUse(node, sourceMask, 0); + } + srcCount += sourceRegCount; + assert(putArgChild->isContained()); + } + else + { + assert(putArgChild->TypeGet() == TYP_STRUCT); + assert(putArgChild->OperGet() == GT_OBJ); + + // We can use a ld/st sequence so we need an internal register + buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); + + GenTree* objChild = putArgChild->gtGetOp1(); + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + assert(objChild->isContained()); + } + else + { + srcCount = BuildIndirUses(putArgChild->AsIndir()); + } + assert(putArgChild->isContained()); + } + buildInternalRegisterUses(); + BuildDefs(argNode, dstCount, argMask); + return srcCount; +} +#endif // FEATURE_ARG_SPLIT + +//------------------------------------------------------------------------ +// BuildBlockStore: Build the RefPositions for a block store node. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = blkNode->Data(); + unsigned size = blkNode->Size(); + + GenTree* srcAddrOrFill = nullptr; + + regMaskTP dstAddrRegMask = RBM_NONE; + regMaskTP srcRegMask = RBM_NONE; + regMaskTP sizeRegMask = RBM_NONE; + + if (blkNode->OperIsInitBlkOp()) + { + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->AsUnOp()->gtGetOp1(); + } + + srcAddrOrFill = src; + + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindUnroll: + { + if (dstAddr->isContained()) + { + // Since the dstAddr is contained the address will be computed in CodeGen. + // This might require an integer register to store the value. + buildInternalIntRegisterDefForNode(blkNode); + } + + const bool isDstRegAddrAlignmentKnown = dstAddr->OperIsLocalAddr(); + + if (isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES)) + { + // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions. + // here just used a temp register. + buildInternalIntRegisterDefForNode(blkNode); + } + } + break; + + case GenTreeBlk::BlkOpKindHelper: + assert(!src->isContained()); + dstAddrRegMask = RBM_ARG_0; + srcRegMask = RBM_ARG_1; + sizeRegMask = RBM_ARG_2; + break; + + default: + unreached(); + } + } + else + { + if (src->OperIs(GT_IND)) + { + assert(src->isContained()); + srcAddrOrFill = src->AsIndir()->Addr(); + } + + if (blkNode->OperIs(GT_STORE_OBJ)) + { + // We don't need to materialize the struct size but we still need + // a temporary register to perform the sequence of loads and stores. + // We can't use the special Write Barrier registers, so exclude them from the mask + regMaskTP internalIntCandidates = + allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); + + if (size >= 2 * REGSIZE_BYTES) + { + // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance + // so we need to reserve an extra internal register. + buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); + } + + // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. + // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, + // which is killed by a StoreObj (and thus needn't be reserved). + if (srcAddrOrFill != nullptr) + { + assert(!srcAddrOrFill->isContained()); + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + } + } + else + { + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindUnroll: + buildInternalIntRegisterDefForNode(blkNode); + break; + + case GenTreeBlk::BlkOpKindHelper: + dstAddrRegMask = RBM_ARG_0; + if (srcAddrOrFill != nullptr) + { + assert(!srcAddrOrFill->isContained()); + srcRegMask = RBM_ARG_1; + } + sizeRegMask = RBM_ARG_2; + break; + + default: + unreached(); + } + } + } + + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE)) + { + // Reserve a temp register for the block size argument. + buildInternalIntRegisterDefForNode(blkNode, sizeRegMask); + } + + int useCount = 0; + + if (!dstAddr->isContained()) + { + useCount++; + BuildUse(dstAddr, dstAddrRegMask); + } + else if (dstAddr->OperIsAddrMode()) + { + useCount += BuildAddrUses(dstAddr->AsAddrMode()->Base()); + } + + if (srcAddrOrFill != nullptr) + { + if (!srcAddrOrFill->isContained()) + { + useCount++; + BuildUse(srcAddrOrFill, srcRegMask); + } + else if (srcAddrOrFill->OperIsAddrMode()) + { + useCount += BuildAddrUses(srcAddrOrFill->AsAddrMode()->Base()); + } + } + + if (blkNode->OperIs(GT_STORE_DYN_BLK)) + { + useCount++; + BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask); + } + + buildInternalRegisterUses(); + regMaskTP killMask = getKillSetForBlockStore(blkNode); + BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + return useCount; +} + +//------------------------------------------------------------------------ +// BuildCast: Set the NodeInfo for a GT_CAST. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildCast(GenTreeCast* cast) +{ + int srcCount = BuildOperandUses(cast->CastOp()); + BuildDef(cast); + + return srcCount; +} + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 68346bb8866c9..19deba41f7657 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -180,7 +180,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Arm64: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() @@ -215,7 +215,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -#ifdef TARGET_ARM64 // ARM64 supports all non-overflow checking conversions directly. +// ARM64 and LoongArch64 optimize all non-overflow checking conversions +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return nullptr; #else switch (dstType) @@ -243,7 +244,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) default: unreached(); } -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } else { @@ -861,6 +862,11 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); curArgTabEntry->SetByteOffset(0); +#ifdef TARGET_LOONGARCH64 + curArgTabEntry->structFloatFieldType[0] = TYP_UNDEF; + curArgTabEntry->structFloatFieldType[1] = TYP_UNDEF; +#endif + hasRegArgs = true; if (argCount >= argTableSize) { @@ -914,6 +920,34 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned } #endif // defined(UNIX_AMD64_ABI) +#if defined(TARGET_LOONGARCH64) +fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + regNumber regNum, + unsigned numRegs, + unsigned byteSize, + unsigned byteAlignment, + bool isStruct, + bool isFloatHfa, /* unused */ + bool isVararg, + const regNumber otherRegNum) +{ + fgArgTabEntry* curArgTabEntry = + AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg); + assert(curArgTabEntry != nullptr); + + INDEBUG(curArgTabEntry->checkIsStruct();) + assert(numRegs <= 2); + if (numRegs == 2) + { + curArgTabEntry->setRegNum(1, otherRegNum); + } + + return curArgTabEntry; +} +#endif // defined(TARGET_LOONGARCH64) + fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, GenTree* node, GenTreeCall::Use* use, @@ -1785,7 +1819,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) if (varTypeIsStruct(type)) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // Can this type be passed as a primitive type? // If so, the following call will return the corresponding primitive type. @@ -1839,7 +1873,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) addrNode = arg; #if FEATURE_MULTIREG_ARGS -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) assert(varTypeIsStruct(type)); if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg())) { @@ -1853,11 +1887,11 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) #else // Always create an Obj of the temp to use it as a call argument. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); -#endif // !TARGET_ARM64 +#endif // !(TARGET_ARM64 || TARGET_LOONGARCH64) #endif // FEATURE_MULTIREG_ARGS } -#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) +#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64) // other targets, we pass the struct by value assert(varTypeIsStruct(type)); @@ -1868,7 +1902,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode); -#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) +#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64) } // (varTypeIsStruct(type)) @@ -1993,8 +2027,18 @@ void fgArgInfo::EvalArgsToTemps() if (setupArg->OperIsCopyBlkOp()) { setupArg = compiler->fgMorphCopyBlock(setupArg); -#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) + // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" + // for "struct { float, float }", and retyping to a primitive here will cause the + // multi-reg morphing to not kick in (the struct in question needs to be passed in + // two FP registers). + // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in + // the arg entry instead of calling it here. + if ((lclVarType == TYP_STRUCT) && (curArgTabEntry->numRegs == 1)) +#else if (lclVarType == TYP_STRUCT) +#endif { // This scalar LclVar widening step is only performed for ARM architectures. // @@ -2004,7 +2048,7 @@ void fgArgInfo::EvalArgsToTemps() scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->IsVararg()); } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) +#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) } // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => @@ -2909,6 +2953,12 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) passUsingFloatRegs = false; +#elif defined(TARGET_LOONGARCH64) + + assert(!callIsVararg && !isHfaArg); + passUsingFloatRegs = varTypeUsesFloatReg(argx); + DWORD floatFieldFlags = STRUCT_NO_FLOAT_FIELD; + #else #error Unsupported or unset target architecture #endif // TARGET* @@ -2972,14 +3022,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); } #else // !UNIX_AMD64_ABI - size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' + size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' if (!isStructArg) { byteSize = genTypeSize(argx); } #endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (isStructArg) { if (isHfaArg) @@ -3037,6 +3087,42 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) structPassingKind howToPassStruct; structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, structSize); passStructByRef = (howToPassStruct == SPK_ByReference); +#if defined(TARGET_LOONGARCH64) + if (!passStructByRef) + { + assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType)); + + floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); + + passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; + compFloatingPointUsed |= passUsingFloatRegs; + + if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) != 0) + { + // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" + // for "struct { float, float }", and retyping to a primitive here will cause the + // multi-reg morphing to not kick in (the struct in question needs to be passed in + // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT". + // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in + // the arg entry instead of calling it here. + structBaseType = TYP_STRUCT; + } + + if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0) + { + size = 1; + } + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) + { + size = 2; + } + } + else // if (passStructByRef) + { + size = 1; + byteSize = TARGET_POINTER_SIZE; + } +#else if (howToPassStruct == SPK_ByReference) { byteSize = TARGET_POINTER_SIZE; @@ -3065,6 +3151,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { size = 1; } +#endif } const var_types argType = args->GetNode()->TypeGet(); @@ -3085,6 +3172,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) argAlignBytes = eeGetArgSizeAlignment(argType, isFloatHfa); } +#ifdef TARGET_LOONGARCH64 + regNumber nextOtherRegNum = REG_STK; +#endif // // Figure out if the argument will be passed in a register. // @@ -3179,7 +3269,74 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } } } -#else // not TARGET_ARM or TARGET_ARM64 + +#elif defined(TARGET_LOONGARCH64) + if (passUsingFloatRegs) + { + // Check if the last register needed is still in the fp argument register range. + passUsingFloatRegs = isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; + + if (isStructArg) + { + if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) && + passUsingFloatRegs) + { + passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs; + } + + if (!passUsingFloatRegs) + { + size = structSize > 8 ? 2 : 1; + floatFieldFlags = 0; + } + else if (passUsingFloatRegs) + { + if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) + { + nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1); + } + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) != 0) + { + assert(size == 1); + size = 2; + passUsingFloatRegs = false; + nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); + } + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) + { + assert(size == 1); + size = 2; + nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum); + } + } + } + + assert(!isHfaArg); // LoongArch64 does not support HFA. + } + + // if we run out of floating-point argument registers, try the int argument registers. + if (!isRegArg) + { + // Check if the last register needed is still in the int argument register range. + isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; + if (!passUsingFloatRegs && isRegArg && (size > 1)) + { + nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1); + } + + // Did we run out of registers when we had a 16-byte struct (size===2) ? + // (i.e we only have one register remaining but we needed two registers to pass this arg) + // + if (!isRegArg && (size > 1)) + { + // We also must update intArgRegNum so that we no longer try to + // allocate any new general purpose registers for args + // + isRegArg = intArgRegNum < maxRegArgs; // the split-struct case. + nextOtherRegNum = REG_STK; + } + } +#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 #if defined(UNIX_AMD64_ABI) @@ -3331,15 +3488,73 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg, - isFloatHfa, callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) - UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) - UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) - UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); + isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum) + UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) + UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) + UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); newArgEntry->SetIsBackFilled(isBackFilled); // Set up the next intArgRegNum and fltArgRegNum values. if (!isBackFilled) { +#if defined(TARGET_LOONGARCH64) + // Increment intArgRegNum by 'size' registers + if (!isNonStandard) + { + if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK)) + { + assert(!passUsingFloatRegs); + assert(size == 2); + intArgRegNum = maxRegArgs; + } + else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0) + { + if (passUsingFloatRegs) + { + fltArgRegNum += 1; + } + else + { + intArgRegNum += size; + } + } + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT; + fltArgRegNum += 1; + newArgEntry->structFloatFieldType[0] = structBaseType; + } + else if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) != 0) + { + fltArgRegNum += 1; + intArgRegNum += 1; + if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) + { + newArgEntry->structFloatFieldType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + newArgEntry->structFloatFieldType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + } + else + { + newArgEntry->structFloatFieldType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + newArgEntry->structFloatFieldType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } + } + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) + { + fltArgRegNum += 2; + newArgEntry->structFloatFieldType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + newArgEntry->structFloatFieldType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } + } +#else + #if defined(UNIX_AMD64_ABI) if (isStructArg) { @@ -3388,6 +3603,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } } } +#endif // defined(TARGET_LOONGARCH64) } } else // We have an argument that is not passed in a register @@ -3635,9 +3851,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } else // This is passed by value. { +#if defined(TARGET_LOONGARCH64) + // For LoongArch64 the struct {float a; float b;} can be passed by two float registers. + DEBUG_ARG_SLOTS_ASSERT((size == roundupSize / TARGET_POINTER_SIZE) || + ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) && + (size == 2) && (size == argEntry->numRegs))); +#else // Check to see if we can transform this into load of a primitive type. // 'size' must be the number of pointer sized items DEBUG_ARG_SLOTS_ASSERT(size == roundupSize / TARGET_POINTER_SIZE); +#endif structSize = originalSize; unsigned passingSize = originalSize; @@ -3658,7 +3881,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) canTransform = (!argEntry->IsHfaArg() || (passingSize == genTypeSize(argEntry->GetHfaType()))); } -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can // only transform in that case if the arg is a local. // TODO-CQ: This transformation should be applicable in general, not just for the ARM64 @@ -3668,7 +3891,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) canTransform = (lclVar != nullptr); passingSize = genTypeSize(structBaseType); } -#endif // TARGET_ARM64 || UNIX_AMD64_ABI +#endif // TARGET_ARM64 || UNIX_AMD64_ABI || TARGET_LOONGARCH64 } if (!canTransform) @@ -3706,7 +3929,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } #endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if ((passingSize != structSize) && (lclVar == nullptr)) { copyBlkClass = objClass; @@ -3829,7 +4052,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType))); } -#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) +#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-CQ-XARCH: there is no need for a temp copy if we improve our code generation in // `genPutStructArgStk` for xarch like we did it for Arm/Arm64. @@ -4238,13 +4461,12 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // this also forces the struct to be stack allocated into the local frame. // For the GT_OBJ case will clone the address expression and generate two (or more) // indirections. -// Currently the implementation handles ARM64/ARM and will NYI for other architectures. // GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) { assert(varTypeIsStruct(arg->TypeGet())); -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) +#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) NYI("fgMorphMultiregStructArg requires implementation for this target"); #endif @@ -4361,19 +4583,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry { assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE); BYTE gcPtrs[MAX_ARG_REG_COUNT]; - elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; +#ifdef TARGET_LOONGARCH64 + // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE + // may be passed by two registers. + // e.g `struct {int a; float b;}` passed by an integer register and a float register. + if (fgEntryPtr->numRegs == 2) + { + elemCount = 2; + } +#endif for (unsigned inx = 0; inx < elemCount; inx++) { -#ifdef UNIX_AMD64_ABI +#if defined(UNIX_AMD64_ABI) if (gcPtrs[inx] == TYPE_GC_NONE) { type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx], fgEntryPtr->structDesc.eightByteSizes[inx]); } else -#endif // UNIX_AMD64_ABI +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(fgEntryPtr->structFloatFieldType[inx]) || + (genTypeSize(fgEntryPtr->structFloatFieldType[inx]) == 4)) + { + type[inx] = fgEntryPtr->structFloatFieldType[inx]; + } + else +#endif // TARGET_LOONGARCH64 { type[inx] = getJitGCType(gcPtrs[inx]); } @@ -4386,8 +4624,14 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry // We can safely widen this to aligned bytes since we are loading from // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and // lives in the stack frame or will be a promoted field. - // + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef TARGET_LOONGARCH64 + // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE + // may be passed by two registers. + // e.g `struct {int a; float b;}` passed by an integer register and a float register. structSize = elemCount * TARGET_POINTER_SIZE; +#endif } else // we must have a GT_OBJ { @@ -4409,11 +4653,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry case 2: type[lastElem] = TYP_SHORT; break; -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) case 4: type[lastElem] = TYP_INT; break; -#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) +#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) || (TARGET_LOONGARCH64) default: noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg"); break; @@ -4517,7 +4761,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else #endif // !UNIX_AMD64_ABI -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // Is this LclVar a promoted struct with exactly 2 fields? if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa()) { @@ -4695,7 +4939,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry { GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); newArg->AddField(this, nextLclFld, offset, type[inx]); - offset += genTypeSize(type[inx]); +#ifdef TARGET_LOONGARCH64 + if (structSize > TARGET_POINTER_SIZE) + { + // For LoongArch64's ABI, maybe there is a padding. + // e.g. `struct {float a; long b;}` + offset += TARGET_POINTER_SIZE; + } + else +#endif + { + offset += genTypeSize(type[inx]); + } } } // Are we passing a GT_OBJ struct? @@ -4745,7 +5000,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry curItem->gtFlags |= GTF_GLOB_REF; newArg->AddField(this, curItem, offset, type[inx]); - offset += genTypeSize(type[inx]); +#ifdef TARGET_LOONGARCH64 + if (structSize > TARGET_POINTER_SIZE) + { + // For LoongArch64's ABI, maybe there is a padding. + // e.g. `struct {float a; long b;}` + offset += TARGET_POINTER_SIZE; + } + else +#endif + { + offset += genTypeSize(type[inx]); + } } } } @@ -12256,8 +12522,11 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) break; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_DIV: +#ifdef TARGET_LOONGARCH64 + case GT_MOD: +#endif if (!varTypeIsFloating(tree->gtType)) { // Codegen for this instruction needs to be able to throw two exceptions: @@ -12266,10 +12535,14 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) } break; case GT_UDIV: +#ifdef TARGET_LOONGARCH64 + case GT_UMOD: +#endif // Codegen for this instruction needs to be able to throw one exception: fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); break; -#endif + +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_ADD: @@ -17514,7 +17787,7 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) void Compiler::fgResetImplicitByRefRefCount() { -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #ifdef DEBUG if (verbose) { @@ -17537,7 +17810,7 @@ void Compiler::fgResetImplicitByRefRefCount() } } -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } //------------------------------------------------------------------------ @@ -17551,7 +17824,7 @@ void Compiler::fgResetImplicitByRefRefCount() void Compiler::fgRetypeImplicitByRefArgs() { -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #ifdef DEBUG if (verbose) { @@ -17750,7 +18023,7 @@ void Compiler::fgRetypeImplicitByRefArgs() } } -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } //------------------------------------------------------------------------ @@ -17763,7 +18036,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() { JITDUMP("\n*************** In fgMarkDemotedImplicitByRefArgs()\n"); -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) { @@ -17824,7 +18097,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() } } -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } /***************************************************************************** @@ -17834,11 +18107,11 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() */ bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) { -#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) +#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) return false; -#else // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#else // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 bool changed = false; @@ -17873,7 +18146,7 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) } return changed; -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index 939ea56badf2c..532fa8fd40976 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -256,6 +256,16 @@ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason)) } #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 + // TODO-LOONGARCH64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog + // can handle non-frame pointer frames. + if (!result) + { + INDEBUG(reason = "Temporary LOONGARCH64 force frame pointer"); + result = true; + } +#endif // TARGET_LOONGARCH64 + #ifdef DEBUG if ((result == true) && (wbReason != nullptr)) { diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h index d06bef0cea1d9..971974722eee8 100644 --- a/src/coreclr/jit/register.h +++ b/src/coreclr/jit/register.h @@ -103,6 +103,9 @@ REGDEF(STK, 16+XMMBASE, 0x0000, "STK" ) #elif defined(TARGET_ARM64) #include "registerarm64.h" +#elif defined(TARGET_LOONGARCH64) + #include "registerloongarch64.h" + #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h new file mode 100644 index 0000000000000..8f3cd157016bb --- /dev/null +++ b/src/coreclr/jit/registerloongarch64.h @@ -0,0 +1,115 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// clang-format off + +/*****************************************************************************/ +/*****************************************************************************/ +#ifndef REGDEF +#error Must define REGDEF macro before including this file +#endif +#ifndef REGALIAS +#define REGALIAS(alias, realname) +#endif + +#define RMASK(x) (1ULL << (x)) + +/* +REGDEF(name, rnum, mask, sname) */ +REGDEF(R0, 0, 0x0001, "zero") +REGDEF(RA, 1, 0x0002, "ra" ) +REGDEF(TP, 2, 0x0004, "tp" ) +REGDEF(SP, 3, 0x0008, "sp" ) +REGDEF(A0, 4, 0x0010, "a0" ) +REGDEF(A1, 5, 0x0020, "a1" ) +REGDEF(A2, 6, 0x0040, "a2" ) +REGDEF(A3, 7, 0x0080, "a3" ) +REGDEF(A4, 8, 0x0100, "a4" ) +REGDEF(A5, 9, 0x0200, "a5" ) +REGDEF(A6, 10, 0x0400, "a6" ) +REGDEF(A7, 11, 0x0800, "a7" ) +REGDEF(T0, 12, 0x1000, "t0" ) +REGDEF(T1, 13, 0x2000, "t1" ) +REGDEF(T2, 14, 0x4000, "t2" ) +REGDEF(T3, 15, 0x8000, "t3" ) +REGDEF(T4, 16, 0x10000, "t4" ) +REGDEF(T5, 17, 0x20000, "t5" ) +REGDEF(T6, 18, 0x40000, "t6" ) +REGDEF(T7, 19, 0x80000, "t7" ) +REGDEF(T8, 20, 0x100000, "t8" ) +REGDEF(X0, 21, 0x200000, "x0" ) +REGDEF(FP, 22, 0x400000, "fp" ) +REGDEF(S0, 23, 0x800000, "s0" ) +REGDEF(S1, 24, 0x1000000, "s1" ) +REGDEF(S2, 25, 0x2000000, "s2" ) +REGDEF(S3, 26, 0x4000000, "s3" ) +REGDEF(S4, 27, 0x8000000, "s4" ) +REGDEF(S5, 28, 0x10000000, "s5" ) +REGDEF(S6, 29, 0x20000000, "s6" ) +REGDEF(S7, 30, 0x40000000, "s7" ) +REGDEF(S8, 31, 0x80000000, "s8" ) + +//NOTE for LoongArch64: +// The `REG_R21` which alias `REG_X0` is specially reserved !!! +// It should be only used with hand written assembly code and should be very careful!!! +// e.g. right now LoongArch64's backend-codegen/emit, there is usually +// a need for an extra register for cases like +// constructing a large imm or offset, saving some intermediate result +// of the overflowing check and integer-comparing result. +// Using the a specially reserved register maybe more efficient. +REGALIAS(R21, X0) + +#define FBASE 32 +#define FMASK(x) (1ULL << (FBASE+(x))) + +/* +REGDEF(name, rnum, mask, sname) */ +REGDEF(F0, 0+FBASE, FMASK(0), "f0") +REGDEF(F1, 1+FBASE, FMASK(1), "f1") +REGDEF(F2, 2+FBASE, FMASK(2), "f2") +REGDEF(F3, 3+FBASE, FMASK(3), "f3") +REGDEF(F4, 4+FBASE, FMASK(4), "f4") +REGDEF(F5, 5+FBASE, FMASK(5), "f5") +REGDEF(F6, 6+FBASE, FMASK(6), "f6") +REGDEF(F7, 7+FBASE, FMASK(7), "f7") +REGDEF(F8, 8+FBASE, FMASK(8), "f8") +REGDEF(F9, 9+FBASE, FMASK(9), "f9") +REGDEF(F10, 10+FBASE, FMASK(10), "f10") +REGDEF(F11, 11+FBASE, FMASK(11), "f11") +REGDEF(F12, 12+FBASE, FMASK(12), "f12") +REGDEF(F13, 13+FBASE, FMASK(13), "f13") +REGDEF(F14, 14+FBASE, FMASK(14), "f14") +REGDEF(F15, 15+FBASE, FMASK(15), "f15") +REGDEF(F16, 16+FBASE, FMASK(16), "f16") +REGDEF(F17, 17+FBASE, FMASK(17), "f17") +REGDEF(F18, 18+FBASE, FMASK(18), "f18") +REGDEF(F19, 19+FBASE, FMASK(19), "f19") +REGDEF(F20, 20+FBASE, FMASK(20), "f20") +REGDEF(F21, 21+FBASE, FMASK(21), "f21") +REGDEF(F22, 22+FBASE, FMASK(22), "f22") +REGDEF(F23, 23+FBASE, FMASK(23), "f23") +REGDEF(F24, 24+FBASE, FMASK(24), "f24") +REGDEF(F25, 25+FBASE, FMASK(25), "f25") +REGDEF(F26, 26+FBASE, FMASK(26), "f26") +REGDEF(F27, 27+FBASE, FMASK(27), "f27") +REGDEF(F28, 28+FBASE, FMASK(28), "f28") +REGDEF(F29, 29+FBASE, FMASK(29), "f29") +REGDEF(F30, 30+FBASE, FMASK(30), "f30") +REGDEF(F31, 31+FBASE, FMASK(31), "f31") + +// The registers with values 64 (NBASE) and above are not real register numbers +#define NBASE 64 + +// This must be last! +REGDEF(STK, 0+NBASE, 0x0000, "STK") + +/*****************************************************************************/ +#undef RMASK +#undef FMASK +#undef FBASE +#undef NBASE +#undef REGDEF +#undef REGALIAS +/*****************************************************************************/ + +// clang-format on diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 58439020fd693..d28a90ec36f5d 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -23,7 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) const regMaskSmall regMasks[] = { #define REGDEF(name, rnum, mask, xname, wname) mask, #include "register.h" @@ -228,9 +228,9 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r rsMaskResvd = RBM_NONE; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) rsMaskCalleeSaved = RBM_NONE; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #ifdef TARGET_ARM rsMaskPreSpillRegArg = RBM_NONE; diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index 34a9bcea64629..9c1a1041eecf8 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -123,9 +123,9 @@ class RegSet private: regMaskTP _rsMaskVars; // backing store for rsMaskVars property -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#endif // TARGET_ARM +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 public: // TODO-Cleanup: Should be private, but Compiler uses it regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty) diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 67bb4523b6f7c..6b2fcd5690eb6 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -295,7 +295,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_LONG: case TYP_DOUBLE: #endif // TARGET_64BIT -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // In the AMD64 ABI we are supposed to pass a struct by reference when its // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies // the IR to comply with the ABI and therefore changes the type of the lclVar @@ -314,7 +314,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( this->vlType = VLT_STK_BYREF; } else -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) { this->vlType = VLT_STK; } @@ -1600,11 +1600,38 @@ void CodeGen::psiBegProlog() if (!isStructHandled) { #ifdef DEBUG +#ifdef TARGET_LOONGARCH64 + var_types regType; + if (varTypeIsStruct(lclVarDsc)) + { + // Must be <= 16 bytes or else it wouldn't be passed in registers, + // which can be bigger (and is handled above). + noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16); + if (emitter::isFloatReg(lclVarDsc->GetArgReg())) + { + regType = TYP_DOUBLE; + } + else + { + regType = lclVarDsc->GetLayout()->GetGCPtrType(0); + } + } + else + { + regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); + if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType)) + { + // For LoongArch64's ABI, the float args may be passed by integer register. + regType = TYP_LONG; + } + } +#else var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); if (lclVarDsc->lvIsHfaRegArg()) { regType = lclVarDsc->GetHfaType(); } +#endif assert(genMapRegNumToRegArgNum(lclVarDsc->GetArgReg(), regType) != (unsigned)-1); #endif // DEBUG diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index b6ab3166e10f8..e5bf31e7f66c2 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -34,7 +34,8 @@ inline bool compMacOsArm64Abi() } inline bool compFeatureArgSplit() { - return TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64); + return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || + (TargetOS::IsWindows && TargetArchitecture::IsArm64); } inline bool compUnixX86Abi() { @@ -51,6 +52,8 @@ inline bool compUnixX86Abi() #define TARGET_READABLE_NAME "ARM" #elif defined(TARGET_ARM64) #define TARGET_READABLE_NAME "ARM64" +#elif defined(TARGET_LOONGARCH64) +#define TARGET_READABLE_NAME "LOONGARCH64" #else #error Unsupported or unset target architecture #endif @@ -70,6 +73,10 @@ inline bool compUnixX86Abi() #define REGMASK_BITS 64 #define CSE_CONST_SHARED_LOW_BITS 12 +#elif defined(TARGET_LOONGARCH64) +#define REGMASK_BITS 64 +#define CSE_CONST_SHARED_LOW_BITS 12 + #else #error Unsupported or unset target architecture #endif @@ -85,7 +92,7 @@ inline bool compUnixX86Abi() // be assigned during register allocation. // REG_NA - Used to indicate that a register is either not yet assigned or not required. // -#if defined(TARGET_ARM) +#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) enum _regNumber_enum : unsigned { #define REGDEF(name, rnum, mask, sname) REG_##name = rnum, @@ -185,7 +192,7 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) typedef unsigned __int64 regMaskTP; #else typedef unsigned regMaskTP; @@ -237,6 +244,8 @@ typedef unsigned char regNumberSmall; #include "targetarm.h" #elif defined(TARGET_ARM64) #include "targetarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "targetloongarch64.h" #else #error Unsupported or unset target architecture #endif @@ -536,7 +545,7 @@ inline regMaskTP genRegMask(regNumber reg) inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) assert(genIsValidFloatReg(reg)); assert((unsigned)reg < ArrLen(regMasks)); return regMasks[reg]; diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp new file mode 100644 index 0000000000000..e0097a1b62a1c --- /dev/null +++ b/src/coreclr/jit/targetloongarch64.cpp @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*****************************************************************************/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_LOONGARCH64) + +#include "target.h" + +const char* Target::g_tgtCPUName = "loongarch64"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; + +// clang-format off +const regNumber intArgRegs [] = {REG_A0, REG_A1, REG_A2, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7}; +const regMaskTP intArgMasks[] = {RBM_A0, RBM_A1, RBM_A2, RBM_A3, RBM_A4, RBM_A5, RBM_A6, RBM_A7}; + +const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7 }; +const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7 }; +// clang-format on + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h new file mode 100644 index 0000000000000..25355994d385b --- /dev/null +++ b/src/coreclr/jit/targetloongarch64.h @@ -0,0 +1,332 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#pragma once + +#if !defined(TARGET_LOONGARCH64) +#error The file should not be included for this platform. +#endif + +// NOTE for LoongArch64: +// The `REG_R21` which alias `REG_X0` is specially reserved !!! +// It can be used only manully and very carefully!!! + +// clang-format off + #define CPU_LOAD_STORE_ARCH 1 + #define CPU_HAS_FP_SUPPORT 1 + #define ROUND_FLOAT 0 // Do not round intermed float expression results + #define CPU_HAS_BYTE_REGS 0 + + #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk. + #define INITBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll InitBlk. + +#ifdef FEATURE_SIMD +#pragma error("SIMD Unimplemented yet LOONGARCH") + #define ALIGN_SIMD_TYPES 1 // whether SIMD type locals are to be aligned + #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls +#endif // FEATURE_SIMD + + #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog + #define FEATURE_STRUCTPROMOTE 0 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers + #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp + #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. + #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register + #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register + #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register + #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (8-byte). + #define MAX_PASS_MULTIREG_BYTES 16 // Maximum size of a struct that could be passed in more than one register + #define MAX_RET_MULTIREG_BYTES 16 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 2 doubles) + #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass a single argument in multiple registers. + #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value. + #define MAX_MULTIREG_COUNT 2 // Maxiumum number of registers defined by a single instruction (including calls). + // This is also the maximum number of registers for a MultiReg node. + + #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers + #define USER_ARGS_COME_LAST 1 + #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really + // need to track stack depth, but this is currently necessary to get GC information reported at call sites. + #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target + #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses. + #define FEATURE_EH_FUNCLETS 1 + #define FEATURE_EH_CALLFINALLY_THUNKS 1 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses. + #define ETW_EBP_FRAMED 1 // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods + #define CSE_CONSTS 1 // Enable if we want to CSE constants + + #define REG_FP_FIRST REG_F0 + #define REG_FP_LAST REG_F31 + #define FIRST_FP_ARGREG REG_F0 + #define LAST_FP_ARGREG REG_F7 + + #define REGNUM_BITS 6 // number of bits in a REG_* within registerloongarch64.h + #define REGSIZE_BYTES 8 // number of bytes in one general purpose register + #define FP_REGSIZE_BYTES 8 // number of bytes in one FP register + #define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP register that are saved/restored. + + #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call. + + #define CODE_ALIGN 4 // code alignment requirement + #define STACK_ALIGN 16 // stack alignment requirement + + #define RBM_INT_CALLEE_SAVED (RBM_S0|RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8) + #define RBM_INT_CALLEE_TRASH (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_T7|RBM_T8) + #define RBM_FLT_CALLEE_SAVED (RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31) + #define RBM_FLT_CALLEE_TRASH (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7) + + #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) + #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) + + #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2 + #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2 + + #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) + #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) + #define RBM_ALLDOUBLE RBM_ALLFLOAT + + // REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED + #define REG_VAR_ORDER REG_A0,REG_A1,REG_A2,REG_A3,REG_A4,REG_A5,REG_A6,REG_A7, \ + REG_T0,REG_T1,REG_T2,REG_T3,REG_T4,REG_T5,REG_T6,REG_T7,REG_T8, \ + REG_CALLEE_SAVED_ORDER + + #define REG_VAR_ORDER_FLT REG_F12,REG_F13,REG_F14,REG_F15,REG_F16,REG_F17,REG_F18,REG_F19, \ + REG_F2,REG_F3,REG_F4,REG_F5,REG_F6,REG_F7,REG_F8,REG_F9,REG_F10, \ + REG_F20,REG_F21,REG_F22,REG_F23, \ + REG_F24,REG_F25,REG_F26,REG_F27,REG_F28,REG_F29,REG_F30,REG_F31, \ + REG_F1,REG_F0 + + #define REG_CALLEE_SAVED_ORDER REG_S0,REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8 + #define RBM_CALLEE_SAVED_ORDER RBM_S0,RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8 + + #define CNT_CALLEE_SAVED (10) //s0-s8,fp. + #define CNT_CALLEE_TRASH (17) + #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) + + #define CNT_CALLEE_SAVED_FLOAT (8) + #define CNT_CALLEE_TRASH_FLOAT (24) + + #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED * REGSIZE_BYTES) + #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES) + + #define REG_TMP_0 REG_T0 + + // Temporary registers used for the GS cookie check. + #define REG_GSCOOKIE_TMP_0 REG_T0 + #define REG_GSCOOKIE_TMP_1 REG_T1 + + // register to hold shift amount; no special register is required on LOONGARCH64. + #define REG_SHIFT REG_NA + #define RBM_SHIFT RBM_ALLINT + + // This is a general scratch register that does not conflict with the argument registers + #define REG_SCRATCH REG_T0 + + // This is a float scratch register that does not conflict with the argument registers + #define REG_SCRATCH_FLT REG_F11 + + // This is a general register that can be optionally reserved for other purposes during codegen + #define REG_OPT_RSVD REG_T1 + #define RBM_OPT_RSVD RBM_T1 + + // Where is the exception object on entry to the handler block? + #define REG_EXCEPTION_OBJECT REG_A0 + #define RBM_EXCEPTION_OBJECT RBM_A0 + + #define REG_JUMP_THUNK_PARAM REG_T2 + #define RBM_JUMP_THUNK_PARAM RBM_T2 + + // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S): + // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): + // On entry: + // t6: the destination address (LHS of the assignment) + // t7: the object reference (RHS of the assignment) + // On exit: + // t0: trashed + // t1: trashed + // t3: trashed + // t4: trashed + // t6: incremented by 8 + // t7: trashed + // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): + // On entry: + // t8: the source address (points to object reference to write) + // t6: the destination address (object reference written here) + // On exit: + // t8: incremented by 8 + // t6: incremented by 8 + // + + #define REG_WRITE_BARRIER_DST REG_T6 + #define RBM_WRITE_BARRIER_DST RBM_T6 + + #define REG_WRITE_BARRIER_SRC REG_T7 + #define RBM_WRITE_BARRIER_SRC RBM_T7 + + #define REG_WRITE_BARRIER_DST_BYREF REG_T6 + #define RBM_WRITE_BARRIER_DST_BYREF RBM_T6 + + #define REG_WRITE_BARRIER_SRC_BYREF REG_T8 + #define RBM_WRITE_BARRIER_SRC_BYREF RBM_T8 + + #define RBM_CALLEE_TRASH_NOGC (RBM_T0|RBM_T1|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET) + + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER RBM_CALLEE_TRASH_NOGC + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that a0 and a1 are still valid byref pointers after this helper call, despite their value being changed. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC + + // GenericPInvokeCalliHelper VASigCookie Parameter + #define REG_PINVOKE_COOKIE_PARAM REG_T3 + #define RBM_PINVOKE_COOKIE_PARAM RBM_T3 + + // GenericPInvokeCalliHelper unmanaged target Parameter + #define REG_PINVOKE_TARGET_PARAM REG_T2 + #define RBM_PINVOKE_TARGET_PARAM RBM_T2 + + // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) + #define REG_SECRET_STUB_PARAM REG_T2 + #define RBM_SECRET_STUB_PARAM RBM_T2 + + // R2R indirect call. Use the same registers as VSD + #define REG_R2R_INDIRECT_PARAM REG_T8 + #define RBM_R2R_INDIRECT_PARAM RBM_T8 + + #define REG_INDIRECT_CALL_TARGET_REG REG_T6 + + // Registers used by PInvoke frame setup + #define REG_PINVOKE_FRAME REG_T0 + #define RBM_PINVOKE_FRAME RBM_T0 + #define REG_PINVOKE_TCB REG_T1 + #define RBM_PINVOKE_TCB RBM_T1 + #define REG_PINVOKE_SCRATCH REG_T1 + #define RBM_PINVOKE_SCRATCH RBM_T1 + + // The following defines are useful for iterating a regNumber + #define REG_FIRST REG_R0 + #define REG_INT_FIRST REG_R0 + #define REG_INT_LAST REG_S8 + #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1) + #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1)) + #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1)) + + // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks + #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_R10 + #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_R10 + #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_R11 + #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_R11 + #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_R10 + #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_R10 + #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_R11 + #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_R11 + + // The registers trashed by profiler enter/leave/tailcall hook + #define RBM_PROFILER_ENTER_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH + + // Which register are int and long values returned in ? + #define REG_INTRET REG_A0 + #define RBM_INTRET RBM_A0 + #define REG_LNGRET REG_A0 + #define RBM_LNGRET RBM_A0 + // second return register for 16-byte structs + #define REG_INTRET_1 REG_A1 + #define RBM_INTRET_1 RBM_A1 + + #define REG_FLOATRET REG_F0 + #define RBM_FLOATRET RBM_F0 + #define RBM_DOUBLERET RBM_F0 + #define REG_FLOATRET_1 REG_F1 + #define RBM_FLOATRET_1 RBM_F1 + #define RBM_DOUBLERET_1 RBM_F1 + + // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper + #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH + + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. + #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_A0 | RBM_A1 | RBM_A2 | RBM_A3 | RBM_A4 | RBM_A5 | RBM_A6 | RBM_A7 | RBM_T3)) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 + #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + + #define REG_FPBASE REG_FP + #define RBM_FPBASE RBM_FP + #define STR_FPBASE "fp" + #define REG_SPBASE REG_SP + #define RBM_SPBASE RBM_SP + #define STR_SPBASE "sp" + + #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved FP and return address + + #define MAX_REG_ARG 8 + #define MAX_FLOAT_REG_ARG 8 + + #define REG_ARG_FIRST REG_A0 + #define REG_ARG_LAST REG_A7 + #define REG_ARG_FP_FIRST REG_F0 + #define REG_ARG_FP_LAST REG_F7 + #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots + + #define REG_ARG_0 REG_A0 + #define REG_ARG_1 REG_A1 + #define REG_ARG_2 REG_A2 + #define REG_ARG_3 REG_A3 + #define REG_ARG_4 REG_A4 + #define REG_ARG_5 REG_A5 + #define REG_ARG_6 REG_A6 + #define REG_ARG_7 REG_A7 + + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; + + #define RBM_ARG_0 RBM_A0 + #define RBM_ARG_1 RBM_A1 + #define RBM_ARG_2 RBM_A2 + #define RBM_ARG_3 RBM_A3 + #define RBM_ARG_4 RBM_A4 + #define RBM_ARG_5 RBM_A5 + #define RBM_ARG_6 RBM_A6 + #define RBM_ARG_7 RBM_A7 + + #define REG_FLTARG_0 REG_F0 + #define REG_FLTARG_1 REG_F1 + #define REG_FLTARG_2 REG_F2 + #define REG_FLTARG_3 REG_F3 + #define REG_FLTARG_4 REG_F4 + #define REG_FLTARG_5 REG_F5 + #define REG_FLTARG_6 REG_F6 + #define REG_FLTARG_7 REG_F7 + + #define RBM_FLTARG_0 RBM_F0 + #define RBM_FLTARG_1 RBM_F1 + #define RBM_FLTARG_2 RBM_F2 + #define RBM_FLTARG_3 RBM_F3 + #define RBM_FLTARG_4 RBM_F4 + #define RBM_FLTARG_5 RBM_F5 + #define RBM_FLTARG_6 RBM_F6 + #define RBM_FLTARG_7 RBM_F7 + + #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7) + #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7) + + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + + #define B_DIST_SMALL_MAX_NEG (-131072) + #define B_DIST_SMALL_MAX_POS (+131071) + + #define OFFSET_DIST_SMALL_MAX_NEG (-2048) + #define OFFSET_DIST_SMALL_MAX_POS (+2047) + + #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0 + +// clang-format on diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index 8d5efd0051906..6ad60a064f35c 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -412,7 +412,8 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func) else { if (TargetArchitecture::IsX64 || - (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86))) + (TargetOS::IsUnix && + (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64))) { assert(func->startLoc != nullptr); offset = func->startLoc->GetFuncletPrologOffset(GetEmitter()); @@ -442,6 +443,10 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func) // See unwindX86.cpp +#elif defined(TARGET_LOONGARCH64) + +// See unwindLoongarch64.cpp + #else // TARGET* #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h index c578c30cb78d0..ae9a19a4b37f3 100644 --- a/src/coreclr/jit/unwind.h +++ b/src/coreclr/jit/unwind.h @@ -10,7 +10,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Windows no longer imposes a maximum prolog size. However, we still have an // assert here just to inform us if we increase the size of the prolog @@ -34,7 +34,15 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 100; #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20) #define UW_MAX_CODE_WORDS_COUNT 31 #define UW_MAX_EPILOG_START_INDEX 0x3FFU -#endif // TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) +const unsigned MAX_PROLOG_SIZE_BYTES = 200; +const unsigned MAX_EPILOG_SIZE_BYTES = 200; +#define UWC_END 0xE4 // "end" unwind code +#define UWC_END_C 0xE5 // "end_c" unwind code +#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20) +#define UW_MAX_CODE_WORDS_COUNT 31 +#define UW_MAX_EPILOG_START_INDEX 0x3FFU +#endif // TARGET_LOONGARCH64 #define UW_MAX_EPILOG_COUNT 31 // Max number that can be encoded in the "Epilog count" field // of the .pdata record @@ -129,9 +137,9 @@ class UnwindCodesBase { #if defined(TARGET_ARM) return b >= 0xFD; -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code? -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } #ifdef DEBUG @@ -813,7 +821,7 @@ class UnwindInfo : public UnwindBase // Given the first byte of the unwind code, check that its opsize matches // the last instruction added in the emitter. void CheckOpsize(BYTE b1); -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) void CheckOpsize(BYTE b1) { } // nothing to do; all instructions are 4 bytes @@ -864,4 +872,4 @@ void DumpUnwindInfo(Compiler* comp, #endif // DEBUG -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp new file mode 100644 index 0000000000000..faae126aa5718 --- /dev/null +++ b/src/coreclr/jit/unwindloongarch64.cpp @@ -0,0 +1,2290 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX UnwindInfo XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_LOONGARCH64) + +#if defined(FEATURE_CFI_SUPPORT) +short Compiler::mapRegNumToDwarfReg(regNumber reg) +{ + short dwarfReg = DWARF_REG_ILLEGAL; + + switch (reg) + { + case REG_R0: + dwarfReg = 0; + break; + case REG_RA: + dwarfReg = 1; + break; + case REG_TP: + dwarfReg = 2; + break; + case REG_SP: + dwarfReg = 3; + break; + case REG_A0: + dwarfReg = 4; + break; + case REG_A1: + dwarfReg = 5; + break; + case REG_A2: + dwarfReg = 6; + break; + case REG_A3: + dwarfReg = 7; + break; + case REG_A4: + dwarfReg = 8; + break; + case REG_A5: + dwarfReg = 9; + break; + case REG_A6: + dwarfReg = 10; + break; + case REG_A7: + dwarfReg = 11; + break; + case REG_T0: + dwarfReg = 12; + break; + case REG_T1: + dwarfReg = 13; + break; + case REG_T2: + dwarfReg = 14; + break; + case REG_T3: + dwarfReg = 15; + break; + case REG_T4: + dwarfReg = 16; + break; + case REG_T5: + dwarfReg = 17; + break; + case REG_T6: + dwarfReg = 18; + break; + case REG_T7: + dwarfReg = 19; + break; + case REG_T8: + dwarfReg = 20; + break; + case REG_X0: + dwarfReg = 21; + break; + case REG_FP: + dwarfReg = 22; + break; + case REG_S0: + dwarfReg = 23; + break; + case REG_S1: + dwarfReg = 24; + break; + case REG_S2: + dwarfReg = 25; + break; + case REG_S3: + dwarfReg = 26; + break; + case REG_S4: + dwarfReg = 27; + break; + case REG_S5: + dwarfReg = 28; + break; + case REG_S6: + dwarfReg = 29; + break; + case REG_S7: + dwarfReg = 30; + break; + case REG_S8: + dwarfReg = 31; + break; + case REG_F0: + dwarfReg = 64; + break; + case REG_F1: + dwarfReg = 65; + break; + case REG_F2: + dwarfReg = 66; + break; + case REG_F3: + dwarfReg = 67; + break; + case REG_F4: + dwarfReg = 68; + break; + case REG_F5: + dwarfReg = 69; + break; + case REG_F6: + dwarfReg = 70; + break; + case REG_F7: + dwarfReg = 71; + break; + case REG_F8: + dwarfReg = 72; + break; + case REG_F9: + dwarfReg = 73; + break; + case REG_F10: + dwarfReg = 74; + break; + case REG_F11: + dwarfReg = 75; + break; + case REG_F12: + dwarfReg = 76; + break; + case REG_F13: + dwarfReg = 77; + break; + case REG_F14: + dwarfReg = 78; + break; + case REG_F15: + dwarfReg = 79; + break; + case REG_F16: + dwarfReg = 80; + break; + case REG_F17: + dwarfReg = 81; + break; + case REG_F18: + dwarfReg = 82; + break; + case REG_F19: + dwarfReg = 83; + break; + case REG_F20: + dwarfReg = 84; + break; + case REG_F21: + dwarfReg = 85; + break; + case REG_F22: + dwarfReg = 86; + break; + case REG_F23: + dwarfReg = 87; + break; + case REG_F24: + dwarfReg = 88; + break; + case REG_F25: + dwarfReg = 89; + break; + case REG_F26: + dwarfReg = 90; + break; + case REG_F27: + dwarfReg = 91; + break; + case REG_F28: + dwarfReg = 92; + break; + case REG_F29: + dwarfReg = 93; + break; + case REG_F30: + dwarfReg = 94; + break; + case REG_F31: + dwarfReg = 95; + break; + + default: + NYI("CFI codes"); + } + + return dwarfReg; +} +#endif // FEATURE_CFI_SUPPORT + +void Compiler::unwindPush(regNumber reg) +{ + unreached(); // use one of the unwindSaveReg* functions instead. +} + +void Compiler::unwindAllocStack(unsigned size) +{ +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindAllocStackCFI(size); + } + + return; + } +#endif // FEATURE_CFI_SUPPORT + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + assert(size % 16 == 0); + unsigned x = size / 16; + + if (x <= 0x1F) + { + // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16) + // TODO-Review: should say size < 512 + + pu->AddCode((BYTE)x); + } + else if (x <= 0x7F) + { + // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16) + + pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x); + } + else + { + // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16) + // + // For large stack size, the most significant bits + // are stored first (and next to the opCode) per the unwind spec. + + pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x); + } +} + +void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset) +{ +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindSetFrameRegCFI(reg, offset); + } + + return; + } +#endif // FEATURE_CFI_SUPPORT + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + if (offset == 0) + { + assert(reg == REG_FP); + + // set_fp: 11100001 : set up fp : with : move fp, sp + pu->AddCode(0xE1); + } + else + { + // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8 + + assert(reg == REG_FP); + assert((offset % 8) == 0); + + unsigned x = offset / 8; + assert(x <= 0x1FF); + + pu->AddCode(0xE2, (BYTE)(x >> 8), (BYTE)x); + } +} + +void Compiler::unwindSaveReg(regNumber reg, unsigned offset) +{ + unwindSaveReg(reg, (int)offset); +} + +void Compiler::unwindNop() +{ + UnwindInfo* pu = &funCurrentFunc()->uwi; + +#ifdef DEBUG + if (verbose) + { + printf("unwindNop: adding NOP\n"); + } +#endif + + INDEBUG(pu->uwiAddingNOP = true); + + // nop: 11100011: no unwind operation is required. + pu->AddCode(0xE3); + + INDEBUG(pu->uwiAddingNOP = false); +} + +void Compiler::unwindSaveReg(regNumber reg, int offset) +{ + + // st.d reg, sp, offset + + // offset for store in prolog must be positive and a multiple of 8. + assert(0 <= offset && offset <= 2047); + assert((offset % 8) == 0); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset); + } + + return; + } +#endif // FEATURE_CFI_SUPPORT + int z = offset / 8; + // assert(0 <= z && z <= 0xFF); + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + if (emitter::isGeneralRegister(reg)) + { + // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 + + assert(reg == REG_RA || reg == REG_FP || // first legal register: RA + (REG_S0 <= reg && reg <= REG_S8)); // last legal register: S8 + + BYTE x = (BYTE)(reg - REG_RA); + assert(0 <= x && x <= 0x1E); + + pu->AddCode(0xD0, (BYTE)x, (BYTE)z); + } + else + { + // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047 + + assert(REG_F24 <= reg && // first legal register: F24 + reg <= REG_F31); // last legal register: F31 + + BYTE x = (BYTE)(reg - REG_F24); + assert(0 <= x && x <= 0x7); + + pu->AddCode(0xDC, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z); + } +} + +void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) +{ + assert(!"unused on LOONGARCH64 yet"); +} + +void Compiler::unwindReturn(regNumber reg) +{ + // Nothing to do; we will always have at least one trailing "end" opcode in our padding. +} + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind Info Debug helpers XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#ifdef DEBUG + +// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes + +unsigned GetUnwindSizeFromUnwindHeader(BYTE b1) +{ + static BYTE s_UnwindSize[256] = { + // array of unwind sizes, in bytes (as specified in the LOONGARCH unwind specification) + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF + 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, // C0-CF + 3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, // D0-DF + 4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F0-FF + }; + + unsigned size = s_UnwindSize[b1]; + assert(1 <= size && size <= 4); + return size; +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind Info Support Classes XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindCodesBase +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef DEBUG + +// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes. +unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog) +{ + BYTE* pCodesStart = GetCodes(); + BYTE* pCodes = pCodesStart; + unsigned size = 0; + for (;;) + { + BYTE b1 = *pCodes; + if (IsEndCode(b1)) + { + break; // We hit an "end" code; we're done + } + size += 4; // All codes represent 4 byte instructions. + pCodes += GetUnwindSizeFromUnwindHeader(b1); + assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed + } + return size; +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Debug dumpers XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#ifdef DEBUG + +// start is 0-based index from LSB, length is number of bits +DWORD ExtractBits(DWORD dw, DWORD start, DWORD length) +{ + return (dw >> start) & ((1 << length) - 1); +} + +// Dump the unwind data. +// Arguments: +// isHotCode: true if this unwind data is for the hot section +// startOffset: byte offset of the code start that this unwind data represents +// endOffset: byte offset of the code end that this unwind data represents +// pHeader: pointer to the unwind data blob +// unwindBlockSize: size in bytes of the unwind data blob + +void DumpUnwindInfo(Compiler* comp, + bool isHotCode, + UNATIVE_OFFSET startOffset, + UNATIVE_OFFSET endOffset, + const BYTE* const pHeader, + ULONG unwindBlockSize) +{ + printf("Unwind Info%s:\n", isHotCode ? "" : " COLD"); + + // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end + // to provide padding, and round down to get a multiple of 4 bytes in size. + DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader; + DWORD dw; + + dw = *pdw++; + + DWORD codeWords = ExtractBits(dw, 27, 5); + DWORD epilogCount = ExtractBits(dw, 22, 5); + DWORD EBit = ExtractBits(dw, 21, 1); + DWORD XBit = ExtractBits(dw, 20, 1); + DWORD Vers = ExtractBits(dw, 18, 2); + DWORD functionLength = ExtractBits(dw, 0, 18); + + printf(" >> Start offset : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset)); + printf(" >> End offset : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset)); + printf(" Code Words : %u\n", codeWords); + printf(" Epilog Count : %u\n", epilogCount); + printf(" E bit : %u\n", EBit); + printf(" X bit : %u\n", XBit); + printf(" Vers : %u\n", Vers); + printf(" Function Length : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength, + functionLength * 4, functionLength * 4); + + assert(functionLength * 4 == endOffset - startOffset); + + if (codeWords == 0 && epilogCount == 0) + { + // We have an extension word specifying a larger number of Code Words or Epilog Counts + // than can be specified in the header word. + + dw = *pdw++; + + codeWords = ExtractBits(dw, 16, 8); + epilogCount = ExtractBits(dw, 0, 16); + assert((dw & 0xF0000000) == 0); // reserved field should be zero + + printf(" ---- Extension word ----\n"); + printf(" Extended Code Words : %u\n", codeWords); + printf(" Extended Epilog Count : %u\n", epilogCount); + } + + bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false + + if (EBit == 0) + { + // We have an array of epilog scopes + + printf(" ---- Epilog scopes ----\n"); + if (epilogCount == 0) + { + printf(" No epilogs\n"); + } + else + { + for (DWORD scope = 0; scope < epilogCount; scope++) + { + dw = *pdw++; + + DWORD epilogStartOffset = ExtractBits(dw, 0, 18); + DWORD res = ExtractBits(dw, 18, 4); + DWORD epilogStartIndex = ExtractBits(dw, 22, 10); + + // Note that epilogStartOffset for a funclet is the offset from the beginning + // of the current funclet, not the offset from the beginning of the main function. + // To help find it when looking through JitDump output, also show the offset from + // the beginning of the main function. + DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset; + + assert(res == 0); + + printf(" ---- Scope %d\n", scope); + printf(" Epilog Start Offset : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main " + "function begin = %u (0x%06x)\n", + comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset), + comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4), + comp->dspOffset(epilogStartOffsetFromMainFunctionBegin), + comp->dspOffset(epilogStartOffsetFromMainFunctionBegin)); + printf(" Epilog Start Index : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex); + + epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes + } + } + } + else + { + printf(" --- One epilog, unwind codes at %u\n", epilogCount); + assert(epilogCount < ArrLen(epilogStartAt)); + epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset + } + + // Dump the unwind codes + + printf(" ---- Unwind codes ----\n"); + + DWORD countOfUnwindCodes = codeWords * 4; + PBYTE pUnwindCode = (PBYTE)pdw; + BYTE b1, b2, b3, b4; + DWORD x, z; + for (DWORD i = 0; i < countOfUnwindCodes; i++) + { + // Does this byte start an epilog sequence? If so, note that fact. + if (epilogStartAt[i]) + { + printf(" ---- Epilog start at index %u ----\n", i); + } + + b1 = *pUnwindCode++; + + if ((b1 & 0xE0) == 0) + { + // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16) + // TODO-Review:should say size < 512 + x = b1 & 0x1F; + printf(" %02X alloc_s #%u (0x%02X); addi.d sp, sp, -%u (0x%03X)\n", b1, x, x, x * 16, x * 16); + } +#if 0 + else if ((b1 & 0xE0) == 0x20) + { + // save_s0s1_x: 001zzzzz: save pair at [sp-#Z*8]!, pre-indexed offset >= -248 + z = b1 & 0x1F; + printf(" %02X save_s0s1_x #%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, z, z, + getRegName(REG_S0), getRegName(REG_S1), z * 8); + } + else if ((b1 & 0xF0) == 0x40) + { + // save_fpra: 0100zzzz | zzzzzzzz: save pair at [sp+#Z*8], offset <= 4080 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2; + printf(" %02X %02X save_fpra #%u (0x%03X); Two sd %s, %s, [sp, #%u]\n", b1, b2, z, z, getRegName(REG_FP), + getRegName(REG_RA), z * 8); + } + else if ((b1 & 0xF0) == 0x80) + { + // save_fpra_x: 1000zzzz | zzzzzzzz: save pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2; + printf(" %02X %02X save_fpra_x #%u (0x%03X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, z, z, + getRegName(REG_FP), getRegName(REG_RA), (z + 1) * 8); + } +#endif + else if ((b1 & 0xF8) == 0xC0) + { + // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16) + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2; + + printf(" %02X %02X alloc_m #%u (0x%03X); addi.d sp, sp, -%u (0x%04X)\n", b1, b2, x, x, x * 16, + x * 16); + } + else if (b1 == 0xD0) + { + // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)b2; + z = (DWORD)b3; + + printf(" %02X %02X %02X save_reg X#%u Z#%u (0x%02X); st.d %s, sp, %u\n", b1, b2, b3, x, z, z, + getRegName(REG_RA + x), z * 8); + } +#if 0 + else if (b1 == 0xC8) + { + // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_regp X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), z * 8); + } + else if (b1 == 0xCC) + { + // save_regp_x: 11001100 | 0xxxzzzz | zzzzzzzz: save pair s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= + // -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i+= 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_regp_x X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), (z + 1) * 8); + } + else if ((b1 & 0xFE) == 0xD4) + { + // save_reg_x: 1101010x | xxxzzzzz: save reg s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -16384 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5); + z = (DWORD)(b2 & 0x1F); + + printf(" %02X %02X save_reg_x X#%u Z#%u (0x%02X); sd %s, [sp, #-%u]!\n", b1, b2, x, z, z, + getRegName(REG_S0 + x), (z + 1) * 8); + } + else if (b1 == 0xD6) + { + // save_rapair: 11010110 | 0xxxzzzz | zzzzzzzz: save pair at [sp + #Z * 8], offset <= 32767 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_lrpair X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_RA), z * 8); + } + else if (b1 == 0xD8) + { + // save_fregp: 11011000 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X) at [sp + #Z * 8], offset <= 32767 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_fregp X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), z * 8); + } + else if (b1 == 0xDA) + { + // save_fregp_x: 11011010 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >= + // -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_fregp_x X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), (z + 1) * 8); + } +#endif + else if (b1 == 0xDC) + { + // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_freg X#%u Z#%u (0x%02X); fst.d %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x), z * 8); + } +#if 0 + else if (b1 == 0xDE) + { + // save_freg_x: 11011110 | xxxzzzzz : save reg f(24 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >= + // -16384 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = (DWORD)(b2 >> 5); + z = (DWORD)(b2 & 0x1F); + + printf(" %02X %02X save_freg_x X#%u Z#%u (0x%02X); sdc1 %s, [sp, #-%u]!\n", b1, b2, x, z, z, + getRegName(REG_F24 + x, true), (z + 1) * 8); + } +#endif + else if (b1 == 0xE0) + { + // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16) + assert(i + 3 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + b4 = *pUnwindCode++; + i += 3; + + x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4; + + printf(" %02X %02X %02X %02X alloc_l %u (0x%06X); addi.d sp, sp, -%u (%06X)\n", b1, b2, b3, b4, x, x, + x * 16, x * 16); + } + else if (b1 == 0xE1) + { + // set_fp: 11100001 : set up $29 : with : move fp, sp + + printf(" %02X set_fp; move %s, sp\n", b1, getRegName(REG_FP)); + } + else if (b1 == 0xE2) + { + // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8 + assert(i + 2 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x, + getRegName(REG_FP), x * 8); + } + else if (b1 == 0xE3) + { + // nop: 11100011: no unwind operation is required. + + printf(" %02X nop\n", b1); + } + else if (b1 == 0xE4) + { + // end: 11100100 : end of unwind code + + printf(" %02X end\n", b1); + } + else if (b1 == 0xE5) + { + // end_c: 11100101 : end of unwind code in current chained scope. + + printf(" %02X end_c\n", b1); + } + else if (b1 == 0xE6) + { + // save_next: 11100110 : save next non - volatile Int or FP register pair. + + printf(" %02X save_next\n", b1); + } + else + { + printf("===========[loongarch64] Unknown / reserved unwind code: %02X\n", b1); + // Unknown / reserved unwind code + assert(!"Internal error decoding unwind codes"); + } + } + + pdw += codeWords; + assert((PBYTE)pdw == pUnwindCode); + assert((PBYTE)pdw == pHeader + unwindBlockSize); + + assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA + + printf("\n"); +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind APIs XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +void Compiler::unwindBegProlog() +{ + assert(compGeneratingProlog); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + unwindBegPrologCFI(); + return; + } +#endif // FEATURE_CFI_SUPPORT + + FuncInfoDsc* func = funCurrentFunc(); + + // There is only one prolog for a function/funclet, and it comes first. So now is + // a good time to initialize all the unwind data structures. + + emitLocation* startLoc; + emitLocation* endLoc; + unwindGetFuncLocations(func, true, &startLoc, &endLoc); + + func->uwi.InitUnwindInfo(this, startLoc, endLoc); + func->uwi.CaptureLocation(); + + func->uwiCold = NULL; // No cold data yet +} + +void Compiler::unwindEndProlog() +{ + assert(compGeneratingProlog); +} + +void Compiler::unwindBegEpilog() +{ + assert(compGeneratingEpilog); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + return; + } +#endif // FEATURE_CFI_SUPPORT + + funCurrentFunc()->uwi.AddEpilog(); +} + +void Compiler::unwindEndEpilog() +{ + assert(compGeneratingEpilog); +} + +// The instructions between the last captured "current state" and the current instruction +// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes +// for them. +void Compiler::unwindPadding() +{ +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + return; + } +#endif // FEATURE_CFI_SUPPORT + + UnwindInfo* pu = &funCurrentFunc()->uwi; + GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this); +} + +// Ask the VM to reserve space for the unwind information for the function and +// all its funclets. +void Compiler::unwindReserve() +{ + assert(!compGeneratingProlog); + assert(!compGeneratingEpilog); + + assert(compFuncInfoCount > 0); + for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++) + { + unwindReserveFunc(funGetFunc(funcIdx)); + } +} + +void Compiler::unwindReserveFunc(FuncInfoDsc* func) +{ + BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE; + bool funcHasColdSection = false; + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + DWORD unwindCodeBytes = 0; + if (fgFirstColdBlock != nullptr) + { + eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes); + } + unwindCodeBytes = (DWORD)(func->cfiCodes->size() * sizeof(CFI_CODE)); + eeReserveUnwindInfo(isFunclet, false /*isColdCode*/, unwindCodeBytes); + + return; + } +#endif // FEATURE_CFI_SUPPORT + + // If there is cold code, split the unwind data between the hot section and the + // cold section. This needs to be done before we split into fragments, as each + // of the hot and cold sections can have multiple fragments. + + if (fgFirstColdBlock != NULL) + { + assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH + + emitLocation* startLoc; + emitLocation* endLoc; + unwindGetFuncLocations(func, false, &startLoc, &endLoc); + + func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo(); + func->uwiCold->InitUnwindInfo(this, startLoc, endLoc); + func->uwiCold->HotColdSplitCodes(&func->uwi); + + funcHasColdSection = true; + } + + // First we need to split the function or funclet into fragments that are no larger + // than 512K, so the fragment size will fit in the unwind data "Function Length" field. + // The LOONGARCH Exception Data specification "Function Fragments" section describes this. + func->uwi.Split(); + + func->uwi.Reserve(isFunclet, true); + + // After the hot section, split and reserve the cold section + + if (funcHasColdSection) + { + assert(func->uwiCold != NULL); + + func->uwiCold->Split(); + func->uwiCold->Reserve(isFunclet, false); + } +} + +// unwindEmit: Report all the unwind information to the VM. +// Arguments: +// pHotCode: Pointer to the beginning of the memory with the function and funclet hot code +// pColdCode: Pointer to the beginning of the memory with the function and funclet cold code. + +void Compiler::unwindEmit(void* pHotCode, void* pColdCode) +{ + assert(compFuncInfoCount > 0); + for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++) + { + unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode); + } +} + +void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode) +{ + // Verify that the JIT enum is in sync with the JIT-EE interface enum + static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT); + static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER); + static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + unwindEmitFuncCFI(func, pHotCode, pColdCode); + return; + } +#endif // FEATURE_CFI_SUPPORT + + func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true); + + if (func->uwiCold != NULL) + { + func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindPrologCodes +// +/////////////////////////////////////////////////////////////////////////////// + +// We're going to use the prolog codes memory to store the final unwind data. +// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then +// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes. +// Set the header pointer for future use, adding the header bytes (this pointer +// is updated when a header byte is added), and remember the index that points +// to the beginning of the header. + +void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes) +{ +#ifdef DEBUG + // We're done adding codes. Check that we didn't accidentally create a bigger prolog. + unsigned codeSize = GetCodeSizeFromUnwindCodes(true); + assert(codeSize <= MAX_PROLOG_SIZE_BYTES); +#endif // DEBUG + + int prologBytes = Size(); + + EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment + + upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header + + assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3); + + upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing + assert(upcHeaderSlot >= -1); + + if (epilogBytes > 0) + { + // The prolog codes that are already at the end of the array need to get moved to the middle, + // with space for the non-matching epilog codes to follow. + + memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes), + &upcMem[upcCodeSlot], prologBytes); + + // Note that the three UWC_END padding bytes still exist at the end of the array. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes. + memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes); +#endif // DEBUG + + upcEpilogSlot = + upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill + + // Update upcCodeSlot to point at the new beginning of the prolog codes + upcCodeSlot = upcUnwindBlockSlot + headerBytes; + } +} + +// Add a header word. Header words are added starting at the beginning, in order: first to last. +// This is in contrast to the prolog unwind codes, which are added in reverse order. +void UnwindPrologCodes::AddHeaderWord(DWORD d) +{ + assert(-1 <= upcHeaderSlot); + assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there! + + // Store it byte-by-byte in little-endian format. We've already ensured there is enough space + // in SetFinalSize(). + upcMem[++upcHeaderSlot] = (BYTE)d; + upcMem[++upcHeaderSlot] = (BYTE)(d >> 8); + upcMem[++upcHeaderSlot] = (BYTE)(d >> 16); + upcMem[++upcHeaderSlot] = (BYTE)(d >> 24); +} + +// AppendEpilog: copy the epilog bytes to the next epilog bytes slot +void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi) +{ + assert(upcEpilogSlot != -1); + + int epiSize = pEpi->Size(); + memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(), + epiSize); // -3 to avoid writing to the alignment padding + assert(pEpi->GetStartIndex() == + upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it. + + upcEpilogSlot += epiSize; + assert(upcEpilogSlot <= upcMemSize - 3); +} + +// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes +void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize) +{ + assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data! + + *ppUnwindBlock = &upcMem[upcUnwindBlockSlot]; + + // We put 4 'end' codes at the end for padding, so we can ensure we have an + // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end' + // codes (leave one), and then align the size up to a multiple of 4. + *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD)); +} + +int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi) +{ + if (Size() < pEpi->Size()) + { + return -1; + } + + int matchIndex = 0; // Size() - pEpi->Size(); + + BYTE* pProlog = GetCodes(); + BYTE* pEpilog = pEpi->GetCodes(); + + // First check set_fp. + if (0 < pEpi->Size()) + { + if (*pProlog == 0xE1) + { + pProlog++; + if (*pEpilog == 0xE1) + { + pEpilog++; + } + else + { + matchIndex = 1; + } + } + else if (*pProlog == 0xE2) + { + pProlog += 3; + if (*pEpilog == 0xE1) + { + pEpilog += 3; + } + else + { + matchIndex = 3; + } + } + } + + if (0 == memcmp(pProlog, pEpilog, pEpi->Size())) + { + return matchIndex; + } + + return -1; +} + +// Copy the prolog codes from another prolog. The only time this is legal is +// if we are at the initial state and no prolog codes have been added. +// This is used to create the 'phantom' prolog for non-first fragments. + +void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom) +{ + assert(uwiComp == pCopyFrom->uwiComp); + assert(upcMem == upcMemLocal); + assert(upcMemSize == UPC_LOCAL_COUNT); + assert(upcHeaderSlot == -1); + assert(upcEpilogSlot == -1); + + // Copy the codes + EnsureSize(pCopyFrom->upcMemSize); + assert(upcMemSize == pCopyFrom->upcMemSize); + memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize); + + // Copy the other data + upcCodeSlot = pCopyFrom->upcCodeSlot; + upcHeaderSlot = pCopyFrom->upcHeaderSlot; + upcEpilogSlot = pCopyFrom->upcEpilogSlot; + upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot; +} + +void UnwindPrologCodes::EnsureSize(int requiredSize) +{ + if (requiredSize > upcMemSize) + { + // Reallocate, and copy everything to a new array. + + // Choose the next power of two size. This may or may not be the best choice. + noway_assert((requiredSize & 0xC0000000) == 0); // too big! + int newSize; + for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1) + { + // do nothing + } + + BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize]; + memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem, + upcMemSize); // copy the existing data to the end +#ifdef DEBUG + // Clear the old unwind codes; nobody should be looking at them + memset(upcMem, 0xFF, upcMemSize); +#endif // DEBUG + upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator + upcCodeSlot += newSize - upcMemSize; + upcMemSize = newSize; + } +} + +#ifdef DEBUG +void UnwindPrologCodes::Dump(int indent) +{ + printf("%*sUnwindPrologCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s &upcMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&upcMemLocal[0])); + printf("%*s upcMem: 0x%08p\n", indent, "", dspPtr(upcMem)); + printf("%*s upcMemSize: %d\n", indent, "", upcMemSize); + printf("%*s upcCodeSlot: %d\n", indent, "", upcCodeSlot); + printf("%*s upcHeaderSlot: %d\n", indent, "", upcHeaderSlot); + printf("%*s upcEpilogSlot: %d\n", indent, "", upcEpilogSlot); + printf("%*s upcUnwindBlockSlot: %d\n", indent, "", upcUnwindBlockSlot); + + if (upcMemSize > 0) + { + printf("%*s codes:", indent, ""); + for (int i = 0; i < upcMemSize; i++) + { + printf(" %02x", upcMem[i]); + if (i == upcCodeSlot) + printf(" <-C"); + else if (i == upcHeaderSlot) + printf(" <-H"); + else if (i == upcEpilogSlot) + printf(" <-E"); + else if (i == upcUnwindBlockSlot) + printf(" <-U"); + } + printf("\n"); + } +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindEpilogCodes +// +/////////////////////////////////////////////////////////////////////////////// + +void UnwindEpilogCodes::EnsureSize(int requiredSize) +{ + if (requiredSize > uecMemSize) + { + // Reallocate, and copy everything to a new array. + + // Choose the next power of two size. This may or may not be the best choice. + noway_assert((requiredSize & 0xC0000000) == 0); // too big! + int newSize; + for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1) + { + // do nothing + } + + BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize]; + memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize); +#ifdef DEBUG + // Clear the old unwind codes; nobody should be looking at them + memset(uecMem, 0xFF, uecMemSize); +#endif // DEBUG + uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator + // uecCodeSlot stays the same + uecMemSize = newSize; + } +} + +#ifdef DEBUG +void UnwindEpilogCodes::Dump(int indent) +{ + printf("%*sUnwindEpilogCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s &uecMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&uecMemLocal[0])); + printf("%*s uecMem: 0x%08p\n", indent, "", dspPtr(uecMem)); + printf("%*s uecMemSize: %d\n", indent, "", uecMemSize); + printf("%*s uecCodeSlot: %d\n", indent, "", uecCodeSlot); + printf("%*s uecFinalized: %s\n", indent, "", dspBool(uecFinalized)); + + if (uecMemSize > 0) + { + printf("%*s codes:", indent, ""); + for (int i = 0; i < uecMemSize; i++) + { + printf(" %02x", uecMem[i]); + if (i == uecCodeSlot) + printf(" <-C"); // Indicate the current pointer + } + printf("\n"); + } +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindEpilogInfo +// +/////////////////////////////////////////////////////////////////////////////// + +// Do the current unwind codes match those of the argument epilog? +// If they don't match, return -1. If they do, return the offset into +// our codes at which the argument codes match. Note that this means that +// the argument codes can match a subset of our codes. The subset needs to be at +// the end, for the "end" code to match. +// +// Note that if we wanted to handle 0xFD and 0xFE codes, by converting +// an existing 0xFF code to one of those, we might do that here. + +int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi) +{ + if (Matches()) + { + // We are already matched to someone else, and won't provide codes to the final layout + return -1; + } + + if (Size() < pEpi->Size()) + { + return -1; + } + + int matchIndex = Size() - pEpi->Size(); + + if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size())) + { + return matchIndex; + } + + return -1; +} + +void UnwindEpilogInfo::CaptureEmitLocation() +{ + noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog + epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation(); + epiEmitLocation->CaptureLocation(uwiComp->GetEmitter()); +} + +void UnwindEpilogInfo::FinalizeOffset() +{ + epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->GetEmitter()); +} + +#ifdef DEBUG +void UnwindEpilogInfo::Dump(int indent) +{ + printf("%*sUnwindEpilogInfo @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s epiNext: 0x%08p\n", indent, "", dspPtr(epiNext)); + printf("%*s epiEmitLocation: 0x%08p\n", indent, "", dspPtr(epiEmitLocation)); + printf("%*s epiStartOffset: 0x%x\n", indent, "", epiStartOffset); + printf("%*s epiMatches: %s\n", indent, "", dspBool(epiMatches)); + printf("%*s epiStartIndex: %d\n", indent, "", epiStartIndex); + + epiCodes.Dump(indent + 2); +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindFragmentInfo +// +/////////////////////////////////////////////////////////////////////////////// + +UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog) + : UnwindBase(comp) + , ufiNext(NULL) + , ufiEmitLoc(emitLoc) + , ufiHasPhantomProlog(hasPhantomProlog) + , ufiPrologCodes(comp) + , ufiEpilogFirst(comp) + , ufiEpilogList(NULL) + , ufiEpilogLast(NULL) + , ufiCurCodes(&ufiPrologCodes) + , ufiSize(0) + , ufiStartOffset(UFI_ILLEGAL_OFFSET) +{ +#ifdef DEBUG + ufiNum = 1; + ufiInProlog = true; + ufiInitialized = UFI_INITIALIZED_PATTERN; +#endif // DEBUG +} + +void UnwindFragmentInfo::FinalizeOffset() +{ + if (ufiEmitLoc == NULL) + { + // NULL emit location means the beginning of the code. This is to handle the first fragment prolog. + ufiStartOffset = 0; + } + else + { + ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->GetEmitter()); + } + + for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + pEpi->FinalizeOffset(); + } +} + +void UnwindFragmentInfo::AddEpilog() +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + +#ifdef DEBUG + if (ufiInProlog) + { + assert(ufiEpilogList == NULL); + ufiInProlog = false; + } + else + { + assert(ufiEpilogList != NULL); + } +#endif // DEBUG + + // Either allocate a new epilog object, or, for the first one, use the + // preallocated one that is a member of the UnwindFragmentInfo class. + + UnwindEpilogInfo* newepi; + + if (ufiEpilogList == NULL) + { + // Use the epilog that's in the class already. Be sure to initialize it! + newepi = ufiEpilogList = &ufiEpilogFirst; + } + else + { + newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp); + } + + // Put the new epilog at the end of the epilog list + + if (ufiEpilogLast != NULL) + { + ufiEpilogLast->epiNext = newepi; + } + + ufiEpilogLast = newepi; + + // What is the starting code offset of the epilog? Store an emitter location + // so we can ask the emitter later, after codegen. + + newepi->CaptureEmitLocation(); + + // Put subsequent unwind codes in this new epilog + + ufiCurCodes = &newepi->epiCodes; +} + +// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will +// become 'phantom' prolog codes in this fragment. Note that this fragment should +// not have any prolog codes currently; it is at the initial state. + +void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom) +{ + ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes); + ufiPrologCodes.AddCode(UWC_END_C); +} + +// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent +// epilogs that start at or after the location represented by 'emitLoc' are removed +// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have +// any epilog codes currently; it is at the initial state. + +void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom) +{ + UnwindEpilogInfo* pEpiPrev; + UnwindEpilogInfo* pEpi; + + UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->GetEmitter()); + + for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext) + { + pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it + if (pEpi->GetStartOffset() >= splitOffset) + { + // This epilog and all following epilogs, which must be in order of increasing offsets, + // get moved to this fragment. + + // Splice in the epilogs to this fragment. Set the head of the epilog + // list to this epilog. + ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst' + ufiEpilogLast = pSplitFrom->ufiEpilogLast; + + // Splice out the tail of the list from the 'pSplitFrom' epilog list + pSplitFrom->ufiEpilogLast = pEpiPrev; + if (pSplitFrom->ufiEpilogLast == NULL) + { + pSplitFrom->ufiEpilogList = NULL; + } + else + { + pSplitFrom->ufiEpilogLast->epiNext = NULL; + } + + // No more codes should be added once we start splitting + pSplitFrom->ufiCurCodes = NULL; + ufiCurCodes = NULL; + + break; + } + } +} + +// Is this epilog at the end of an unwind fragment? Ask the emitter. +// Note that we need to know this before all code offsets are finalized, +// so we can determine whether we can omit an epilog scope word for a +// single matching epilog. + +bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi) +{ + return uwiComp->GetEmitter()->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc); +} + +// Merge the unwind codes as much as possible. +// This function is called before all offsets are final. +// Also, compute the size of the final unwind block. Store this +// and some other data for later, when we actually emit the +// unwind block. + +void UnwindFragmentInfo::MergeCodes() +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + + unsigned epilogCount = 0; + unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the + // prolog codes + unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes + UnwindEpilogInfo* pEpi; + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + ++epilogCount; + + pEpi->FinalizeCodes(); + + // Does this epilog match the prolog? + // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal + // prolog and epilog codes. + + int matchIndex; + + matchIndex = ufiPrologCodes.Match(pEpi); + if (matchIndex != -1) + { + pEpi->SetMatches(); + pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index + } + else + { + // The epilog codes don't match the prolog codes. Do they match any of the epilogs + // we've seen so far? + + bool matched = false; + for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext) + { + matchIndex = pEpi2->Match(pEpi); + if (matchIndex != -1) + { + // Use the same epilog index as the one we matched, as it has already been set. + pEpi->SetMatches(); + pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's + // codes, in which case matchIndex > 0 + matched = true; + break; + } + } + + if (!matched) + { + pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location + epilogCodeBytes += pEpi->Size(); + epilogIndex += pEpi->Size(); + } + } + } + + DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes; + codeBytes = AlignUp(codeBytes, sizeof(DWORD)); + + DWORD codeWords = + codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block + + // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"? + + bool needExtendedCodeWordsEpilogCount = + (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT); + + // How many epilog scope words do we need? + + bool setEBit = false; // do we need to set the E bit? + unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs! + + if (epilogCount == 1) + { + assert(ufiEpilogList != NULL); + assert(ufiEpilogList->epiNext == NULL); + + if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog + !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList)) + { + epilogScopes = 0; // Don't need any epilog scope words + setEBit = true; + } + } + + DWORD headerBytes = (1 // Always need first header DWORD + + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code + // Words or Extended Epilog Count? + + epilogScopes // One DWORD per epilog scope, for EBit = 0 + ) * + sizeof(DWORD); // convert it to bytes + + DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words, + // including end padding if necessary + + // Construct the final unwind information. + + // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs + // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match + // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the + // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog + // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we + // have that much memory, and then copy the prolog codes to the right place, appending the non-matching + // epilog codes and prepending the header. + + ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes); + + if (epilogCodeBytes != 0) + { + // We need to copy the epilog code bytes to their final memory location + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + if (!pEpi->Matches()) + { + ufiPrologCodes.AppendEpilog(pEpi); + } + } + } + + // Save some data for later + ufiSize = finalSize; + ufiSetEBit = setEBit; + ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount; + ufiCodeWords = codeWords; + ufiEpilogScopes = epilogScopes; +} + +// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header. + +void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength) +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + +#ifdef DEBUG + if (0 && uwiComp->verbose) + { + printf("*************** Before fragment #%d finalize\n", ufiNum); + Dump(); + } +#endif + + // Compute the header + + noway_assert((functionLength & 3) == 0); + DWORD headerFunctionLength = functionLength / 4; + + DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined. + DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some. + DWORD headerEBit; + DWORD headerEpilogCount; // This depends on how we set headerEBit. + DWORD headerCodeWords; + DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit. + DWORD headerExtendedCodeWords = 0; + + if (ufiSetEBit) + { + headerEBit = 1; + headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes! + headerCodeWords = ufiCodeWords; + } + else + { + headerEBit = 0; + + if (ufiNeedExtendedCodeWordsEpilogCount) + { + headerEpilogCount = 0; + headerCodeWords = 0; + headerExtendedEpilogCount = ufiEpilogScopes; + headerExtendedCodeWords = ufiCodeWords; + } + else + { + headerEpilogCount = ufiEpilogScopes; + headerCodeWords = ufiCodeWords; + } + } + + // Start writing the header + + noway_assert(headerFunctionLength <= + 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error + + if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) | + (headerEpilogCount << 22) | (headerCodeWords << 27); + + ufiPrologCodes.AddHeaderWord(header); + + // Construct the second header word, if needed + + if (ufiNeedExtendedCodeWordsEpilogCount) + { + noway_assert(headerEBit == 0); + noway_assert(headerEpilogCount == 0); + noway_assert(headerCodeWords == 0); + noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) || + (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT)); + + if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) || + (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16); + + ufiPrologCodes.AddHeaderWord(header2); + } + + // Construct the epilog scope words, if needed + + if (!ufiSetEBit) + { + for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + // The epilog must strictly follow the prolog. The prolog is in the first fragment of + // the hot section. If this epilog is at the start of a fragment, it can't be the + // first fragment in the hot section. We actually don't know if we're processing + // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus, + // we just assert that the epilog starts within the fragment. + assert(pEpi->GetStartOffset() >= GetStartOffset()); + + // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment, + // NOT the offset from the beginning of the main function. + DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset(); + + noway_assert((headerEpilogStartOffset & 3) == 0); + headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits + // of the actual offset is always zero) + + DWORD headerEpilogStartIndex = pEpi->GetStartIndex(); + + if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) || + (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22); + + ufiPrologCodes.AddHeaderWord(epilogScopeWord); + } + } + + // The unwind code words are already here, following the header, so we're done! +} + +void UnwindFragmentInfo::Reserve(bool isFunclet, bool isHotCode) +{ + assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH + + MergeCodes(); + + BOOL isColdCode = isHotCode ? FALSE : TRUE; + + ULONG unwindSize = Size(); + +#ifdef DEBUG + if (uwiComp->verbose) + { + if (ufiNum != 1) + printf("reserveUnwindInfo: fragment #%d:\n", ufiNum); + } +#endif + + uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize); +} + +// Allocate the unwind info for a fragment with the VM. +// Arguments: +// funKind: funclet kind +// pHotCode: hot section code buffer +// pColdCode: cold section code buffer +// funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a +// function/funclet. +// isHotCode: are we allocating the unwind info for the hot code section? + +void UnwindFragmentInfo::Allocate( + CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode) +{ + UNATIVE_OFFSET startOffset; + UNATIVE_OFFSET endOffset; + UNATIVE_OFFSET codeSize; + + // We don't support hot/cold splitting with EH, so if there is cold code, this + // better not be a funclet! + // TODO-CQ: support funclets in cold code + + noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT); + + // Compute the final size, and start and end offsets of the fragment + + startOffset = GetStartOffset(); + + if (ufiNext == NULL) + { + // This is the last fragment, so the fragment extends to the end of the function/fragment. + assert(funcEndOffset != 0); + endOffset = funcEndOffset; + } + else + { + // The fragment length is all the code between the beginning of this fragment + // and the beginning of the next fragment. Note that all fragments have had their + // offsets computed before any fragment is allocated. + endOffset = ufiNext->GetStartOffset(); + } + + assert(endOffset > startOffset); + codeSize = endOffset - startOffset; + + // Finalize the fragment unwind block to hand to the VM + + Finalize(codeSize); + + // Get the final unwind information and hand it to the VM + + ULONG unwindBlockSize; + BYTE* pUnwindBlock; + + GetFinalInfo(&pUnwindBlock, &unwindBlockSize); + +#ifdef DEBUG + if (uwiComp->opts.dspUnwind) + { + DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize); + } +#endif // DEBUG + + // Adjust for cold or hot code: + // 1. The VM doesn't want the cold code pointer unless this is cold code. + // 2. The startOffset and endOffset need to be from the base of the hot section for hot code + // and from the base of the cold section for cold code + + if (isHotCode) + { + assert(endOffset <= uwiComp->info.compTotalHotCodeSize); + pColdCode = NULL; + } + else + { + assert(startOffset >= uwiComp->info.compTotalHotCodeSize); + startOffset -= uwiComp->info.compTotalHotCodeSize; + endOffset -= uwiComp->info.compTotalHotCodeSize; + } + +#ifdef DEBUG + if (uwiComp->verbose) + { + if (ufiNum != 1) + printf("unwindEmit: fragment #%d:\n", ufiNum); + } +#endif // DEBUG + + uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock, + funKind); +} + +#ifdef DEBUG +void UnwindFragmentInfo::Dump(int indent) +{ + unsigned count; + UnwindEpilogInfo* pEpi; + + count = 0; + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + ++count; + } + + printf("%*sUnwindFragmentInfo #%d, @0x%08p, size:%d:\n", indent, "", ufiNum, dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s ufiNext: 0x%08p\n", indent, "", dspPtr(ufiNext)); + printf("%*s ufiEmitLoc: 0x%08p\n", indent, "", dspPtr(ufiEmitLoc)); + printf("%*s ufiHasPhantomProlog: %s\n", indent, "", dspBool(ufiHasPhantomProlog)); + printf("%*s %d epilog%s\n", indent, "", count, (count != 1) ? "s" : ""); + printf("%*s ufiEpilogList: 0x%08p\n", indent, "", dspPtr(ufiEpilogList)); + printf("%*s ufiEpilogLast: 0x%08p\n", indent, "", dspPtr(ufiEpilogLast)); + printf("%*s ufiCurCodes: 0x%08p\n", indent, "", dspPtr(ufiCurCodes)); + printf("%*s ufiSize: %u\n", indent, "", ufiSize); + printf("%*s ufiSetEBit: %s\n", indent, "", dspBool(ufiSetEBit)); + printf("%*s ufiNeedExtendedCodeWordsEpilogCount: %s\n", indent, "", dspBool(ufiNeedExtendedCodeWordsEpilogCount)); + printf("%*s ufiCodeWords: %u\n", indent, "", ufiCodeWords); + printf("%*s ufiEpilogScopes: %u\n", indent, "", ufiEpilogScopes); + printf("%*s ufiStartOffset: 0x%x\n", indent, "", ufiStartOffset); + printf("%*s ufiInProlog: %s\n", indent, "", dspBool(ufiInProlog)); + printf("%*s ufiInitialized: 0x%08x\n", indent, "", ufiInitialized); + + ufiPrologCodes.Dump(indent + 2); + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + pEpi->Dump(indent + 2); + } +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindInfo +// +/////////////////////////////////////////////////////////////////////////////// + +void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc) +{ + uwiComp = comp; + + // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated. + // However, its constructor needs to be explicitly called, since the constructor for + // UnwindInfo is not called. + + new (&uwiFragmentFirst, jitstd::placement_t()) UnwindFragmentInfo(comp, startLoc, false); + + uwiFragmentLast = &uwiFragmentFirst; + + uwiEndLoc = endLoc; + + // Allocate an emitter location object. It is initialized to something + // invalid: it has a null 'ig' that needs to get set before it can be used. + // Note that when we create an UnwindInfo for the cold section, this never + // gets initialized with anything useful, since we never add unwind codes + // to the cold section; we simply distribute the existing (previously added) codes. + uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation(); + +#ifdef DEBUG + uwiInitialized = UWI_INITIALIZED_PATTERN; + uwiAddingNOP = false; +#endif // DEBUG +} + +// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi') +// and those that are in the cold section (move them to 'this'). There is exactly one fragment +// in each UnwindInfo; the fragments haven't been split for size, yet. + +void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi) +{ + // Ensure that there is exactly a single fragment in both the hot and the cold sections + assert(&uwiFragmentFirst == uwiFragmentLast); + assert(&puwi->uwiFragmentFirst == puwi->uwiFragmentLast); + assert(uwiFragmentLast->ufiNext == NULL); + assert(puwi->uwiFragmentLast->ufiNext == NULL); + + // The real prolog is in the hot section, so this, cold, section has a phantom prolog + uwiFragmentLast->ufiHasPhantomProlog = true; + uwiFragmentLast->CopyPrologCodes(puwi->uwiFragmentLast); + + // Now split the epilog codes + uwiFragmentLast->SplitEpilogCodes(uwiFragmentLast->ufiEmitLoc, puwi->uwiFragmentLast); +} + +// Split the function or funclet into fragments that are no larger than 512K, +// so the fragment size will fit in the unwind data "Function Length" field. +// The LOONGARCH Exception Data specification "Function Fragments" section describes this. +// We split the function so that it is no larger than 512K bytes, or the value of +// the COMPlus_JitSplitFunctionSize value, if defined (and smaller). We must determine +// how to split the function/funclet before we issue the instructions, so we can +// reserve the unwind space with the VM. The instructions issued may shrink (but not +// expand!) during issuing (although this is extremely rare in any case, and may not +// actually occur on LOONGARCH), so we don't finalize actual sizes or offsets. +// +// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this +// works! +// +// We don't split any prolog or epilog. Ideally, we might not split an instruction, +// although that doesn't matter because the unwind at any point would still be +// well-defined. + +void UnwindInfo::Split() +{ + UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes + + maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES; + +#ifdef DEBUG + // Consider COMPlus_JitSplitFunctionSize + unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize(); + + if (splitFunctionSize != 0) + if (splitFunctionSize < maxFragmentSize) + maxFragmentSize = splitFunctionSize; +#endif // DEBUG + + // Now, there should be exactly one fragment. + + assert(uwiFragmentLast != NULL); + assert(uwiFragmentLast == &uwiFragmentFirst); + assert(uwiFragmentLast->ufiNext == NULL); + + // Find the code size of this function/funclet. + + UNATIVE_OFFSET startOffset; + UNATIVE_OFFSET endOffset; + UNATIVE_OFFSET codeSize; + + if (uwiFragmentLast->ufiEmitLoc == NULL) + { + // NULL emit location means the beginning of the code. This is to handle the first fragment prolog. + startOffset = 0; + } + else + { + startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->GetEmitter()); + } + + if (uwiEndLoc == NULL) + { + // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions + // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks. + // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late + // for us, since we need to decide how many fragments we need before the code memory is allocated + // (which is before instruction issuing). + UNATIVE_OFFSET estimatedTotalCodeSize = + uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize; + assert(estimatedTotalCodeSize != 0); + endOffset = estimatedTotalCodeSize; + } + else + { + endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter()); + } + + assert(endOffset > startOffset); // there better be at least 1 byte of code + codeSize = endOffset - startOffset; + + // Now that we know the code size for this section (main function hot or cold, or funclet), + // figure out how many fragments we're going to need. + + UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up + assert(numberOfFragments > 0); + + if (numberOfFragments == 1) + { + // No need to split; we're done + return; + } + + // Now, we're going to commit to splitting the function into "numberOfFragments" fragments, + // for the purpose of unwind information. We need to do the actual splits so we can figure out + // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know + // the actual offsets of the splits since we haven't issued the instructions yet, so store + // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase, + // like we do for the function length and epilog offsets. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (uwiComp->verbose) + { + printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n", + numberOfFragments, codeSize, maxFragmentSize); + } +#endif // DEBUG + + // Call the emitter to do the split, and call us back for every split point it chooses. + uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this, + EmitSplitCallback); + +#ifdef DEBUG + // Did the emitter split the function/funclet into as many fragments as we asked for? + // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not + // be fewer if we're splitting into 512K blocks! + + unsigned fragCount = 0; + for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + ++fragCount; + } + if (fragCount < numberOfFragments) + { + if (uwiComp->verbose) + { + printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount); + } + + // If this fires, then we split into fewer fragments than we asked for, and we are using + // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit + // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something + // small, we might not be able to split into as many fragments as asked for, because we + // can't split prologs or epilogs. + assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES); + } +#endif // DEBUG +} + +/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc) +{ + UnwindInfo* puwi = (UnwindInfo*)context; + puwi->AddFragment(emitLoc); +} + +// Reserve space for the unwind info for all fragments + +void UnwindInfo::Reserve(bool isFunclet, bool isHotCode) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(isHotCode || !isFunclet); + + for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Reserve(isFunclet, isHotCode); + } +} + +// Allocate and populate VM unwind info for all fragments + +void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + + UnwindFragmentInfo* pFrag; + + // First, finalize all the offsets (the location of the beginning of fragments, and epilogs), + // so a fragment can use the finalized offset of the subsequent fragment to determine its code size. + + UNATIVE_OFFSET endOffset; + + if (uwiEndLoc == NULL) + { + assert(uwiComp->info.compNativeCodeSize != 0); + endOffset = uwiComp->info.compNativeCodeSize; + } + else + { + endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter()); + } + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->FinalizeOffset(); + } + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode); + } +} + +void UnwindInfo::AddEpilog() +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiFragmentLast != NULL); + uwiFragmentLast->AddEpilog(); + CaptureLocation(); +} + +void UnwindInfo::CaptureLocation() +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiCurLoc != NULL); + uwiCurLoc->CaptureLocation(uwiComp->GetEmitter()); +} + +void UnwindInfo::AddFragment(emitLocation* emitLoc) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiFragmentLast != NULL); + + UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true); + +#ifdef DEBUG + newFrag->ufiNum = uwiFragmentLast->ufiNum + 1; +#endif // DEBUG + + newFrag->CopyPrologCodes(&uwiFragmentFirst); + newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast); + + // Link the new fragment in at the end of the fragment list + uwiFragmentLast->ufiNext = newFrag; + uwiFragmentLast = newFrag; +} + +#ifdef DEBUG + +void UnwindInfo::Dump(bool isHotCode, int indent) +{ + unsigned count; + UnwindFragmentInfo* pFrag; + + count = 0; + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + ++count; + } + + printf("%*sUnwindInfo %s@0x%08p, size:%d:\n", indent, "", isHotCode ? "" : "COLD ", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s %d fragment%s\n", indent, "", count, (count != 1) ? "s" : ""); + printf("%*s uwiFragmentLast: 0x%08p\n", indent, "", dspPtr(uwiFragmentLast)); + printf("%*s uwiEndLoc: 0x%08p\n", indent, "", dspPtr(uwiEndLoc)); + printf("%*s uwiInitialized: 0x%08x\n", indent, "", uwiInitialized); + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Dump(indent + 2); + } +} + +#endif // DEBUG + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 529c6538699f5..3364c84a1d859 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -227,6 +227,17 @@ const char* getRegNameFloat(regNumber reg, var_types type) return regNamesFloat[reg]; +#elif defined(TARGET_LOONGARCH64) + + static const char* regNamesFloat[] = { +#define REGDEF(name, rnum, mask, sname) sname, +#include "register.h" + }; + + assert((unsigned)reg < ArrLen(regNamesFloat)); + + return regNamesFloat[reg]; + #else static const char* regNamesFloat[] = { #define REGDEF(name, rnum, mask, sname) "x" sname, @@ -316,6 +327,14 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) } #elif defined(TARGET_X86) // No register ranges + +#elif defined(TARGET_LOONGARCH64) + if (REG_A0 <= regNum && regNum <= REG_T8) + { + regHead = regNum; + inRegRange = true; + sep = "-"; + } #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -325,10 +344,12 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) // We've already printed a register. Is this the end of a range? else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB || (regNum == REG_R28)) // last register before FP -#else // TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) + else if ((regNum == REG_INT_LAST) || (regNum == REG_A7) || (regNum == REG_T8)) +#else // TARGET_LOONGARCH64 // We've already printed a register. Is this the end of a range? else if (regNum == REG_INT_LAST) -#endif // TARGET_ARM64 +#endif // TARGET_LOONGARCH64 { const char* nam = getRegName(regNum); printf("%s%s", sep, nam); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 27d0059552241..d4833dce961f9 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -49,13 +49,13 @@ struct FloatTraits // Notes: // "Default" NaN value returned by expression 0.0f / 0.0f on x86/x64 has // different binary representation (0xffc00000) than NaN on - // ARM32/ARM64 (0x7fc00000). + // ARM32/ARM64/LoongArch64 (0x7fc00000). static float NaN() { #if defined(TARGET_XARCH) unsigned bits = 0xFFC00000u; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) unsigned bits = 0x7FC00000u; #else #error Unsupported or unset target architecture @@ -75,13 +75,13 @@ struct DoubleTraits // Notes: // "Default" NaN value returned by expression 0.0 / 0.0 on x86/x64 has // different binary representation (0xfff8000000000000) than NaN on - // ARM32/ARM64 (0x7ff8000000000000). + // ARM32/ARM64/LoongArch64 (0x7ff8000000000000). static double NaN() { #if defined(TARGET_XARCH) unsigned long long bits = 0xFFF8000000000000ull; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) unsigned long long bits = 0x7FF8000000000000ull; #else #error Unsupported or unset target architecture @@ -106,7 +106,7 @@ struct DoubleTraits template TFp FpAdd(TFp value1, TFp value2) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // If [value1] is negative infinity and [value2] is positive infinity // the result is NaN. // If [value1] is positive infinity and [value2] is negative infinity @@ -124,7 +124,7 @@ TFp FpAdd(TFp value1, TFp value2) return TFpTraits::NaN(); } } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return value1 + value2; } @@ -142,7 +142,7 @@ TFp FpAdd(TFp value1, TFp value2) template TFp FpSub(TFp value1, TFp value2) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // If [value1] is positive infinity and [value2] is positive infinity // the result is NaN. // If [value1] is negative infinity and [value2] is negative infinity @@ -160,7 +160,7 @@ TFp FpSub(TFp value1, TFp value2) return TFpTraits::NaN(); } } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return value1 - value2; } @@ -178,7 +178,7 @@ TFp FpSub(TFp value1, TFp value2) template TFp FpMul(TFp value1, TFp value2) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // From the ECMA standard: // // If [value1] is zero and [value2] is infinity @@ -194,7 +194,7 @@ TFp FpMul(TFp value1, TFp value2) { return TFpTraits::NaN(); } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return value1 * value2; } @@ -212,7 +212,7 @@ TFp FpMul(TFp value1, TFp value2) template TFp FpDiv(TFp dividend, TFp divisor) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // From the ECMA standard: // // If [dividend] is zero and [divisor] is zero @@ -228,7 +228,7 @@ TFp FpDiv(TFp dividend, TFp divisor) { return TFpTraits::NaN(); } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return dividend / divisor; } @@ -9767,7 +9767,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet())); } -#if defined(FEATURE_READYTORUN) && defined(TARGET_ARMARCH) +#if defined(FEATURE_READYTORUN) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)) if (call->IsR2RRelativeIndir()) { #ifdef DEBUG @@ -9782,7 +9782,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN // in morph. So we do not need to use EntryPointAddrAsArg0, because arg0 is already an entry point addr. useEntryPointAddrAsArg0 = false; } -#endif // FEATURE_READYTORUN && TARGET_ARMARCH +#endif // FEATURE_READYTORUN && (TARGET_ARMARCH || TARGET_LOONGARCH64) if (nArgs == 0) { diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index cc88c400a5a33..32f17c685e137 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -179,6 +179,10 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of t #elif defined (TARGET_ARM) // No Hardware Intrinsics on ARM32 + +#elif defined (TARGET_LOONGARCH64) + //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported. + #else #error Unsupported platform #endif