From 7d0932691cf99c23a23b0bb5ce9a25b948bd7217 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 5 Mar 2024 15:03:08 +0800 Subject: [PATCH] [LoongArch64] redesign the frame layout to avoid the redundant computation of the stackOffsets. After the `lvaAssignVirtualFrameOffsetsToLocals`, there is no need to recompute the LclVarDsc's StackOffset within the `lvaFixVirtualFrameOffsets`. --- src/coreclr/jit/codegen.h | 2 - src/coreclr/jit/codegencommon.cpp | 35 +- src/coreclr/jit/codegenloongarch64.cpp | 523 +++++++++---------------- src/coreclr/jit/compiler.cpp | 14 +- src/coreclr/jit/lclvars.cpp | 127 +++++- 5 files changed, 344 insertions(+), 357 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 7a2359c9fb5fc9..396549bf356664 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -442,7 +442,6 @@ class CodeGen final : public CodeGenInterface int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) - int fiCalleeSavedPadding; // CalleeSaved offset padding (positive) int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) int fiSpDelta; // Stack pointer delta (negative) @@ -1281,7 +1280,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genJmpMethod(GenTree* jmp); BasicBlock* genCallFinally(BasicBlock* block); #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // TODO: refactor for LA. void genCodeForJumpCompare(GenTreeOpCC* tree); #endif #if defined(TARGET_ARM64) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index bf2bfaa28aad47..7e7bea633f3611 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4930,7 +4930,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals() GetEmitter()->emitIns_R_AR(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset); -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#elif defined(TARGET_ARM64) || defined(TARGET_RISCV64) // Patchpoint offset is from top of Tier0 frame // @@ -4956,6 +4956,39 @@ void CodeGen::genEnregisterOSRArgsAndLocals() // | | | | const int offset = stkOffs + tier0FrameSize + osrFrameSize - osrSpToFpDelta; + JITDUMP("---OSR--- V%02u (reg) Tier0 virtual offset %d OSR frame size %d OSR sp-fp " + "delta %d total offset %d (0x%x)\n", + varNum, stkOffs, osrFrameSize, osrSpToFpDelta, offset, offset); + + genInstrWithConstant(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset, initReg); + *pInitRegZeroed = false; + +#elif defined(TARGET_LOONGARCH64) + + // Patchpoint offset is from top of Tier0 frame + // + // We need to determine the frame-pointer relative + // offset for this variable in the osr frame. + // + // First get the fp's relative offset within Tier0 frame + // + const int tier0FrameOffset = compiler->info.compPatchpointInfo->CalleeSaveRegisters(); + + // then add the OSR frame size + // + const int osrFrameSize = genTotalFrameSize(); + + // then subtract OSR SP-FP delta + // + const int osrSpToFpDelta = genSPtoFPdelta(); + + // | => tier0 top of frame relative + // | + => tier0's fp relative offset + // | | + => osr bottom of frame (sp) relative + // | | | - => osr fp relative + // | | | | + const int offset = stkOffs + tier0FrameOffset + osrFrameSize - osrSpToFpDelta; + JITDUMP("---OSR--- V%02u (reg) Tier0 virtual offset %d OSR frame size %d OSR sp-fp " "delta %d total offset %d (0x%x)\n", varNum, stkOffs, osrFrameSize, osrSpToFpDelta, offset, offset); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 3d7547fdac90b9..c418da580f29e9 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -190,8 +190,7 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* // reg1 - First register of pair to save. // reg2 - Second register of pair to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or -// zero). +// spDelta - Always zero for LoongArch64 now. // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This // allows us to emit the "save_next" unwind code. // tmpReg - An available temporary register. Needed for the case of large frames. @@ -210,8 +209,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta <= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD @@ -221,16 +219,6 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, ins = INS_fst_d; } - if (spDelta != 0) - { - // generate addi.d SP,SP,-imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - - assert((spDelta + spOffset + 16) <= 0); - - assert(spOffset <= 2031); // 2047-16 - } - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); @@ -249,8 +237,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // Arguments: // reg1 - Register to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or -// zero). +// spDelta - Always zero for LoongArch64 now. // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. @@ -261,8 +248,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta <= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); instruction ins = INS_st_d; if (genIsValidFloatReg(reg1)) @@ -270,12 +256,6 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum ins = INS_fst_d; } - if (spDelta != 0) - { - // generate addi.d SP,SP,-imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - } - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); } @@ -290,8 +270,7 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. // spOffset - The offset from SP to load reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or -// zero). +// spDelta - Always zero for LoongArch64 now. // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This // allows us to emit the "save_next" unwind code. // tmpReg - An available temporary register. Needed for the case of large frames. @@ -310,8 +289,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta >= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD @@ -321,27 +299,11 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, ins = INS_fld_d; } - if (spDelta != 0) - { - assert(!useSaveNextPair); - - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); - compiler->unwindSaveReg(reg2, spOffset + 8); - - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - - // generate addi.d SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - } - else - { - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); - compiler->unwindSaveReg(reg2, spOffset + 8); + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - } + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); } //------------------------------------------------------------------------ @@ -350,8 +312,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, // Arguments: // reg1 - Register to restore. // spOffset - The offset from SP to restore reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or -// zero). +// spDelta - Always zero for LoongArch64 now. // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. @@ -362,8 +323,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta >= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); instruction ins = INS_ld_d; if (genIsValidFloatReg(reg1)) @@ -371,20 +331,8 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg ins = INS_fld_d; } - if (spDelta != 0) - { - // ld.d reg1,SP,offset - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - - // generate addi.d SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - } - else - { - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - } + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); } //------------------------------------------------------------------------ @@ -520,11 +468,12 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) // // Arguments: // regsMask - a mask of registers for prolog generation; -// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); +// spDelta - Always zero for LoongArch64 now. // spOffset - the offset from SP that is the beginning of the callee-saved register area; // void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) { + assert(spDelta == 0); const int slotSize = genGetSlotSizeForRegsInMask(regsMask); ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); @@ -536,7 +485,7 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i if (regPair.reg2 != REG_NA) { // We can use two SD instructions. - genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_R21, + genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, 0, regPair.useSaveNextPair, REG_R21, nullptr); spOffset += 2 * slotSize; @@ -544,11 +493,9 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i else { // No register pair; we use a SD instruction. - genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_R21, nullptr); + genPrologSaveReg(regPair.reg1, spOffset, 0, REG_R21, nullptr); spOffset += slotSize; } - - spDelta = 0; // We've now changed SP already, if necessary; don't do it again. } } @@ -574,34 +521,22 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // // Arguments: // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. -// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that -// if non-zero spDelta, then this is the offset of the first save *after* that -// SP adjustment. -// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or -// zero). +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// spDelta - Always zero for LoongArch64 now. // // Notes: // The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers. // void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) { - assert(spDelta <= 0); + assert(spDelta == 0); - unsigned regsToSaveCount = genCountBits(regsToSaveMask); - if (regsToSaveCount == 0) + if (regsToSaveMask == 0) { - if (spDelta != 0) - { - // Currently this is the case for varargs only - // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. - genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } return; } - assert((spDelta % 16) == 0); - - assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED)); + assert(genCountBits(regsToSaveMask) <= genCountBits(RBM_CALLEE_SAVED)); // Save integer registers at higher addresses than floating-point registers. @@ -610,15 +545,13 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe if (maskSaveRegsFloat != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); - spDelta = 0; + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, 0, lowestCalleeSavedOffset); lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsInt != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); - // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, 0, lowestCalleeSavedOffset); } } @@ -627,11 +560,12 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsMask - a mask of registers for epilog generation; -// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); +// spDelta - Always zero for LoongArch64 now. // spOffset - the offset from SP that is the beginning of the callee-saved register area; // void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) { + assert(spDelta == 0); const int slotSize = genGetSlotSizeForRegsInMask(regsMask); ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); @@ -640,15 +574,6 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta int stackDelta = 0; for (int i = 0; i < regStack.Height(); ++i) { - bool lastRestoreInTheGroup = (i == regStack.Height() - 1); - bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0); - if (updateStackDelta) - { - // Update stack delta only if it is the last restore (the first save). - assert(stackDelta == 0); - stackDelta = spDelta; - } - RegPair regPair = regStack.Top(i); if (regPair.reg2 != REG_NA) { @@ -672,8 +597,7 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. -// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or -// zero). +// spDelta - Always zero for LoongArch64 now. // // Here's an example restore sequence: // ld.d s8,sp,#xxx @@ -694,23 +618,15 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) { - assert(spDelta >= 0); - unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); - if (regsToRestoreCount == 0) + assert(spDelta == 0); + if (regsToRestoreMask == 0) { - if (spDelta != 0) - { - // Currently this is the case for varargs only - // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. - genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } return; } - assert((spDelta % 16) == 0); - - // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED. - assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA)); + unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); + // The FP and RA are not in RBM_CALLEE_SAVED. + assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED)); // Point past the end, to start. We predecrement to find the offset to load from. static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); @@ -725,15 +641,13 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskRestoreRegsInt != RBM_NONE) { - int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, 0, spOffset); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { - // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, 0, spOffset); // No need to update spOffset since it's not used after this. } } @@ -771,20 +685,16 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | incoming arguments | * +=======================+ <---- Caller's SP - * | OSR padding | // If required - * |-----------------------| - * | Varargs regs space | // Only for varargs main functions; 64 bytes + * | Varargs regs space | // Only for varargs main functions; not used for LA64. * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) * |-----------------------| - * ~ alignment padding ~ // To make the whole frame 16 byte aligned + * |Callee saved registers | // multiple of 8 bytes, not includting FP/RA * |-----------------------| * | Saved FP, RA | // 16 bytes * |-----------------------| - * |Callee saved registers | // multiple of 8 bytes, not includting FP/RA - * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) * |-----------------------| <---- Ambient SP * | | | @@ -910,42 +820,41 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_A0; } - regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; - int regsSavedSize = (compiler->compCalleeRegsPushed - 2) << 3; + regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; + int SP_to_CalleeSaved_delta = genFuncletInfo.fiSP_to_CalleeSaved_delta; + int FP_offset = SP_to_CalleeSaved_delta - 16; - int SP_to_CalleeSaved_delta = genFuncletInfo.fiSP_to_CalleeSaved_delta; - if ((SP_to_CalleeSaved_delta + regsSavedSize + genFuncletInfo.fiCalleeSavedPadding) <= 2040) - { - SP_to_CalleeSaved_delta += genFuncletInfo.fiCalleeSavedPadding; + if ((SP_to_CalleeSaved_delta + (compiler->compCalleeRegsPushed << 3)) <= (2040 + 16)) + { genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); genSaveCalleeSavedRegistersHelp(maskSaveRegs, SP_to_CalleeSaved_delta, 0); - SP_to_CalleeSaved_delta += regsSavedSize; - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, SP_to_CalleeSaved_delta); - compiler->unwindSaveReg(REG_RA, SP_to_CalleeSaved_delta); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, SP_to_CalleeSaved_delta + 8); - compiler->unwindSaveReg(REG_FP, SP_to_CalleeSaved_delta + 8); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); } else { assert(frameSize < -2040); - int SP_delta = frameSize + SP_to_CalleeSaved_delta; - genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + SP_to_CalleeSaved_delta = FP_offset & -16; + FP_offset &= 0xf; + + genStackPointerAdjustment(frameSize + SP_to_CalleeSaved_delta, REG_R21, nullptr, true); - genSaveCalleeSavedRegistersHelp(maskSaveRegs, genFuncletInfo.fiCalleeSavedPadding, 0); - regsSavedSize += genFuncletInfo.fiCalleeSavedPadding; + genSaveCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, regsSavedSize); - compiler->unwindSaveReg(REG_RA, regsSavedSize); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, regsSavedSize + 8); - compiler->unwindSaveReg(REG_FP, regsSavedSize + 8); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - genStackPointerAdjustment(-SP_to_CalleeSaved_delta, REG_R21, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(-SP_to_CalleeSaved_delta, REG_R21, nullptr, true); } // This is the end of the OS-reported prolog for purposes of unwinding @@ -1013,40 +922,31 @@ void CodeGen::genFuncletEpilog() assert(frameSize < 0); regMaskTP regsToRestoreMask = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; - int regsRestoreSize = (compiler->compCalleeRegsPushed - 2) << 3; - int SP_to_CalleeSaved_delta = genFuncletInfo.fiSP_to_CalleeSaved_delta; - if ((SP_to_CalleeSaved_delta + regsRestoreSize + genFuncletInfo.fiCalleeSavedPadding) <= 2040) + int FP_offset = SP_to_CalleeSaved_delta - 16; + + if ((SP_to_CalleeSaved_delta + (compiler->compCalleeRegsPushed << 3)) > (2040 + 16)) { - SP_to_CalleeSaved_delta += genFuncletInfo.fiCalleeSavedPadding; - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, SP_to_CalleeSaved_delta, 0); - SP_to_CalleeSaved_delta += regsRestoreSize; + assert(frameSize < -2040); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, SP_to_CalleeSaved_delta); - compiler->unwindSaveReg(REG_RA, SP_to_CalleeSaved_delta); + int SP_delta = FP_offset & -16; - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, SP_to_CalleeSaved_delta + 8); - compiler->unwindSaveReg(REG_FP, SP_to_CalleeSaved_delta + 8); + FP_offset = FP_offset & 0xf; + SP_to_CalleeSaved_delta = FP_offset + 16; - genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + frameSize += SP_delta; } - else - { - assert(frameSize < -2040); - genStackPointerAdjustment(SP_to_CalleeSaved_delta, REG_R21, nullptr, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, SP_to_CalleeSaved_delta, 0); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiCalleeSavedPadding, 0); - regsRestoreSize += genFuncletInfo.fiCalleeSavedPadding; + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, regsRestoreSize); - compiler->unwindSaveReg(REG_RA, regsRestoreSize); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, regsRestoreSize + 8); - compiler->unwindSaveReg(REG_FP, regsRestoreSize + 8); - - genStackPointerAdjustment(-frameSize - SP_to_CalleeSaved_delta, REG_R21, nullptr, /* reportUnwindData */ true); - } + genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); compiler->unwindReturn(REG_RA); @@ -1070,9 +970,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() { return; } - assert(isFramePointerUsed()); - // The frame size and offsets must be finalized assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); @@ -1080,58 +978,43 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0; - // Because a method and funclets must have the same caller-relative PSPSym offset, // if there is a PSPSym, we have to pad the funclet frame size for OSR. // - unsigned osrPad = 0; - if (compiler->opts.IsOSR() && (PSPSize > 0)) + int osrPad = 0; + if (compiler->opts.IsOSR()) { - osrPad = compiler->info.compPatchpointInfo->TotalFrameSize(); + osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); // OSR pad must be already aligned to stack size. assert((osrPad % STACK_ALIGN) == 0); } - genFuncletInfo.fiCalleeSavedPadding = 0; - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad; + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; - unsigned regsSavedSize = genCountBits(rsMaskSaveRegs) << 3; - assert(genCountBits(rsMaskSaveRegs) == compiler->compCalleeRegsPushed); - - unsigned saveRegsPlusPSPSize = regsSavedSize + PSPSize; - - assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); - unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); - - unsigned funcletFrameSize = osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; - unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); - - int SP_to_CalleeSaved_delta = compiler->lvaOutgoingArgSpaceSize; - if ((SP_to_CalleeSaved_delta + regsSavedSize) >= 2040) + int funcletFrameSizeAligned = genTotalFrameSize(); + int delta_PSP = -8; + int SP_to_CalleeSaved_delta = compiler->compLclFrameSize; + if (compiler->lvaPSPSym == BAD_VAR_NUM) { - int offset = funcletFrameSizeAligned - SP_to_CalleeSaved_delta; - SP_to_CalleeSaved_delta = AlignUp((UINT)offset, STACK_ALIGN); - - genFuncletInfo.fiCalleeSavedPadding = SP_to_CalleeSaved_delta - offset; + SP_to_CalleeSaved_delta += TARGET_POINTER_SIZE; } - - if (compiler->lvaMonAcquired != BAD_VAR_NUM && !compiler->opts.IsOSR()) + if (compiler->lvaMonAcquired == BAD_VAR_NUM) + { + SP_to_CalleeSaved_delta += TARGET_POINTER_SIZE; + } + else { - // We furthermore allocate the "monitor acquired" bool between PSP and - // the saved registers because this is part of the EnC header. - // Note that OSR methods reuse the monitor bool created by tier 0. - osrPad += compiler->lvaLclSize(compiler->lvaMonAcquired); + delta_PSP += -8; } /* Now save it for future use */ - genFuncletInfo.fiSpDelta = -(int)funcletFrameSizeAligned; + genFuncletInfo.fiSpDelta = -funcletFrameSizeAligned; genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; genFuncletInfo.fiSP_to_CalleeSaved_delta = SP_to_CalleeSaved_delta; - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSizeAligned - osrPad - 8; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = -(int)osrPad - 8; + genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSizeAligned + delta_PSP; + genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; #ifdef DEBUG if (verbose) @@ -1149,7 +1032,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } - assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0); + assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 16); // Always above the ra/fp. if (compiler->lvaPSPSym != BAD_VAR_NUM) { @@ -4323,7 +4206,15 @@ int CodeGenInterface::genSPtoFPdelta() const assert(isFramePointerUsed()); assert(compiler->compCalleeRegsPushed >= 2); - int delta = compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3) - 8; + int delta = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + delta -= TARGET_POINTER_SIZE; + } + if (compiler->lvaMonAcquired != BAD_VAR_NUM) + { + delta -= TARGET_POINTER_SIZE; + } assert(delta >= 0); return delta; @@ -7702,8 +7593,8 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * ... * st.d s8,sp,off2+8*8 * - * st.d ra,sp,off3 - * st.d fp,sp,off3+8 + * st.d ra,sp,off3+8 + * st.d fp,sp,off3 * * Notes: * 1. FP is always saved, and the first store is FP, RA. @@ -7711,9 +7602,9 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 3. For frames with varargs, not implemented completely and not tested ! * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * - * For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top - * of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same - * rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * For functions with GS and localloc, we had saved the frame pointer and RA at the top + * of the frame. Note that the funclet frames must follow the same rule, + * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the @@ -7722,26 +7613,31 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * * The LoongArch64's frame layout is liking: * + * If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address + * (FP and RA) are protected from buffer overrun by the GS cookie. + * So we always save the FP/RA along with the rest of the callee-saved registers above. + * * | | * |-----------------------| * | incoming arguments | * +=======================+ <---- Caller's SP - * | Arguments Or | // if needed. * | Varargs regs space | // Only for varargs functions; (varargs not implemented for LoongArch64) * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) + * | PSPSym | // 8 bytes, Only for frames with EH, (omitted in NativeAOT ABI) * |-----------------------| - * | locals, temps, etc. | + * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| - * | possible GS cookie | + * | Saved RA | // 8 bytes * |-----------------------| * | Saved FP | // 8 bytes * |-----------------------| - * | Saved RA | // 8 bytes + * | possible GS cookie | * |-----------------------| - * |Callee saved registers | // not including FP/RA; multiple of 8 bytes + * | locals, temps, etc. | + * |-----------------------| + * | possible GS cookie | * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) * |-----------------------| <---- Ambient SP @@ -7790,6 +7686,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe regSet.rsMaskCalleeSaved = rsPushRegs | RBM_FPBASE | RBM_RA; #ifdef DEBUG + JITDUMP("Frame info. #outsz=%d; #framesz=%d; LclFrameSize=%d;\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + genTotalFrameSize(), compiler->compLclFrameSize); + if (compiler->compCalleeRegsPushed != genCountBits(regSet.rsMaskCalleeSaved)) { printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", @@ -7812,84 +7711,52 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif // DEBUG - // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we - // generate based on various sizes. - int frameType = 0; - - // The amount to add from SP before starting to store the callee-saved registers. - int calleeSaveSPDelta = 0; - - // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address - // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses, - // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will - // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our - // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above - // the GS cookie. - // - // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to - // create a frame pointer chain. - // + int totalFrameSize = genTotalFrameSize(); + int leftFrameSize = 0; + int localFrameSize = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } + if (compiler->lvaMonAcquired != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } - // This will be the starting place for saving the callee-saved registers, in increasing order. - int offset = compiler->lvaOutgoingArgSpaceSize; +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("Frame info. #outsz=%d; #framesz=%d; lcl=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + genTotalFrameSize(), localFrameSize); + } +#endif - int totalFrameSize = genTotalFrameSize(); - // The (totalFrameSize <= 2040) condition ensures the offsets of st.d/ld.d. + int FP_offset = localFrameSize; if (totalFrameSize <= 2040) { GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); compiler->unwindAllocStack(totalFrameSize); - - // Case #1. - // - // Generate: - // addi.d sp, sp, -framesz - // st.d callee_saved_registers ### not including the fp and ra. - // st.d ra,sp,outsz - // st.d fp,sp,outsz+8 - // - // After saving callee-saved registers, ra and fp, we establish the frame pointer with: - // addi.d fp, sp, (the offset of saving fp) - // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. - - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); - - frameType = 1; } else { - JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); - - frameType = 2; - - if ((offset + (compiler->compCalleeRegsPushed << 3)) >= 2040) - { - offset = totalFrameSize - compiler->lvaOutgoingArgSpaceSize; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; - - genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); - } - else + if ((localFrameSize + (compiler->compCalleeRegsPushed << 3)) > 2040) { - genStackPointerAdjustment(-totalFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); + leftFrameSize = localFrameSize & -16; + totalFrameSize = totalFrameSize - (localFrameSize & -16); + FP_offset = localFrameSize & 0xf; } + genStackPointerAdjustment(-totalFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); } + genSaveCalleeSavedRegistersHelp(rsPushRegs, FP_offset + 16, 0); - JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); - genSaveCalleeSavedRegistersHelp(rsPushRegs, offset, 0); - offset += (int)(genCountBits(rsPushRegs) << 3); - - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset); - compiler->unwindSaveReg(REG_RA, offset); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset + 8); - compiler->unwindSaveReg(REG_FP, offset + 8); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - JITDUMP(" offsetSpToSavedFp=%d\n", offset + 8); - genEstablishFramePointer(offset + 8, /* reportUnwindData */ true); + JITDUMP(" offsetSpToSavedFp=%d\n", FP_offset); + genEstablishFramePointer(FP_offset, /* reportUnwindData */ true); // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't @@ -7900,19 +7767,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe NYI_LOONGARCH64("genPushCalleeSavedRegisters unsupports compIsVarArgs"); } -#ifdef DEBUG - if (compiler->opts.disAsm) - { - assert(frameType != 0); - printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType); - } -#endif - - if (calleeSaveSPDelta != 0) + if (leftFrameSize != 0) { - assert(frameType == 2); - calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; - genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + genStackPointerAdjustment(-leftFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); } } @@ -7924,76 +7781,68 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) assert(isFramePointerUsed()); - // This will be the starting place for restoring the callee-saved registers, in decreasing order. - int calleeSaveSPOffset = 0; - int remainingSPSize = 0; - int totalFrameSize = genTotalFrameSize(); - if (totalFrameSize <= 2040) + int localFrameSize = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } + if (compiler->lvaMonAcquired != BAD_VAR_NUM) { - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, dspBool(compiler->compLocallocUsed)); + localFrameSize -= TARGET_POINTER_SIZE; + } + + JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + int FP_offset = localFrameSize; + int remainingSPSize = totalFrameSize; + if (totalFrameSize <= 2040) + { if (compiler->compLocallocUsed) { - int SPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize; - + int SPtoFPdelta = genSPtoFPdelta(); // Restore sp from fp GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - remainingSPSize = totalFrameSize; } else { - JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " - "localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - if ((compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3)) > 2047) + if (compiler->compLocallocUsed) { - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize & -16; - if (compiler->compLocallocUsed) + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + if (emitter::isValidSimm12(SPtoFPdelta)) { - int SPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8; - - // Restore sp from fp GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); - compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } else { - genStackPointerAdjustment(calleeSaveSPOffset, REG_RA, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_RA, SPtoFPdelta); + GetEmitter()->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, REG_RA); } - remainingSPSize = totalFrameSize - calleeSaveSPOffset; - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize - calleeSaveSPOffset; } - else + if ((localFrameSize + (compiler->compCalleeRegsPushed << 3)) > 2040) { - if (compiler->compLocallocUsed) - { - int SPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize; + remainingSPSize = localFrameSize & -16; + genStackPointerAdjustment(remainingSPSize, REG_RA, nullptr, /* reportUnwindData */ true); - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); - compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); - } - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - remainingSPSize = totalFrameSize; + remainingSPSize = totalFrameSize - remainingSPSize; + FP_offset = localFrameSize & 0xf; } } - JITDUMP(" calleeSaveSPOffset=%d\n", calleeSaveSPOffset); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, 0); - calleeSaveSPOffset += (compiler->compCalleeRegsPushed - 2) << 3; + JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16, 0); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset); - compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset + 8); - compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset + 8); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); if (emitter::isValidUimm11(remainingSPSize)) { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2bba7db9ae7ae5..600378030de970 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5699,12 +5699,18 @@ void Compiler::generatePatchpointInfo() // const int totalFrameSize = codeGen->genTotalFrameSize() + TARGET_POINTER_SIZE; const int offsetAdjust = 0; -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#elif defined(TARGET_ARM64) || defined(TARGET_RISCV64) // SP is not manipulated by calls so no frame size adjustment needed. // Local Offsets may need adjusting, if FP is at bottom of frame. // const int totalFrameSize = codeGen->genTotalFrameSize(); const int offsetAdjust = codeGen->genSPtoFPdelta() - totalFrameSize; +#elif defined(TARGET_LOONGARCH64) + // SP is not manipulated by calls so no frame size adjustment needed. + // Local Offsets don't need adjusting for LoongArch64. + // + const int totalFrameSize = codeGen->genTotalFrameSize(); + const int offsetAdjust = 0; #else NYI("patchpoint info generation"); const int offsetAdjust = 0; @@ -5796,6 +5802,12 @@ void Compiler::generatePatchpointInfo() JITDUMP("--OSR-- Tier0 callee saves: "); JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters())); JITDUMP("\n"); +#elif defined(TARGET_LOONGARCH64) + // NOTE: + // The callee saved registers include the FP/RA here!!! + // We save the first callee saved register's offset, which is the FP's offset. + patchpointInfo->SetCalleeSaveRegisters(codeGen->genSPtoFPdelta()); + JITDUMP("--OSR-- Tier0 callee saved Offset: %d \n", patchpointInfo->CalleeSaveRegisters()); #endif // Register this with the runtime. diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 55787580cc1df7..bedb2916bee9c3 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5215,18 +5215,20 @@ void Compiler::lvaFixVirtualFrameOffsets() // We set FP to be after LR, FP delta += 2 * REGSIZE_BYTES; } -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) else { // FP is used. JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if !defined(TARGET_LOONGARCH64) + // For LoongArch64, there is no need to recompute the offset. if (opts.IsOSR()) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) // Stack offset includes Tier0 frame. // JITDUMP("--- delta bump %d for OSR + Tier0 frame\n", info.compPatchpointInfo->TotalFrameSize()); @@ -5318,6 +5320,7 @@ void Compiler::lvaFixVirtualFrameOffsets() } lvaCachedGenericContextArgOffs += delta; +#endif #if FEATURE_FIXED_OUT_ARGS @@ -5331,23 +5334,24 @@ void Compiler::lvaFixVirtualFrameOffsets() #endif // FEATURE_FIXED_OUT_ARGS -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // We normally add alignment below the locals between them and the outgoing // arg space area. When we store fp/lr(ra) at the bottom, however, this will // be below the alignment. So we should not apply the alignment adjustment to // them. It turns out we always store these at +0 and +8 of the FP, // so instead of dealing with skipping adjustment just for them we just set // them here always. + // For LoongArch64, the RA is above the fp. see the `genPushCalleeSavedRegisters` assert(codeGen->isFramePointerUsed()); if (lvaRetAddrVar != BAD_VAR_NUM) { lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); } -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#elif defined(TARGET_RISCV64) assert(codeGen->isFramePointerUsed()); if (lvaRetAddrVar != BAD_VAR_NUM) { - // For LoongArch64 and RISCV64, the RA is below the fp. see the `genPushCalleeSavedRegisters` + // For RISCV64, the RA is below the fp. see the `genPushCalleeSavedRegisters` lvaTable[lvaRetAddrVar].SetStackOffset(-REGSIZE_BYTES); } #endif // !TARGET_LOONGARCH64 @@ -5987,10 +5991,27 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, { argOffs = roundUp(argOffs, argAlignment); } + int tmpOffs = argOffs; + +#if defined(TARGET_LOONGARCH64) + tmpOffs += compCalleeRegsPushed << 3; + if (lvaPSPSym != BAD_VAR_NUM) + { + tmpOffs += TARGET_POINTER_SIZE; + } + if (opts.IsOSR()) + { + tmpOffs += info.compPatchpointInfo->TotalFrameSize(); + } + else if (lvaMonAcquired != BAD_VAR_NUM) + { + tmpOffs += TARGET_POINTER_SIZE; + } +#endif assert((argSize % argAlignment) == 0); - assert((argOffs % argAlignment) == 0); - varDsc->SetStackOffset(argOffs); + assert((tmpOffs % argAlignment) == 0); + varDsc->SetStackOffset(tmpOffs); } // For struct promoted parameters we need to set the offsets for both LclVars. @@ -6080,9 +6101,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // if (opts.IsOSR()) { +#if defined(TARGET_LOONGARCH64) + originalFrameStkOffs = (compCalleeRegsPushed << 3) + info.compPatchpointInfo->CalleeSaveRegisters(); +#else originalFrameSize = info.compPatchpointInfo->TotalFrameSize(); originalFrameStkOffs = stkOffs; stkOffs -= originalFrameSize; +#endif } #ifdef TARGET_XARCH @@ -6135,12 +6160,16 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() assert(compCalleeRegsPushed >= 2); stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; } +#elif defined(TARGET_LOONGARCH64) -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + assert(compCalleeRegsPushed >= 2); + stkOffs = (compCalleeRegsPushed << 3); + +#elif defined(TARGET_RISCV64) assert(compCalleeRegsPushed >= 2); -#else // !TARGET_LOONGARCH64 && !TARGET_RISCV64 +#else // !TARGET_RISCV64 #ifdef TARGET_ARM // On ARM32 LR is part of the pushed registers and is always stored at the // top. @@ -6151,7 +6180,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #endif stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; -#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64 +#endif // !TARGET_RISCV64 // (2) Account for the remainder of the frame // @@ -6237,6 +6266,59 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_AMD64 +#if defined(TARGET_LOONGARCH64) + // For LoongArch64, after the `lvaAssignVirtualFrameOffsetsToLocals`, there is no need + // to recompute the LclVarDsc's StackOffset within the `lvaFixVirtualFrameOffsets`. + assert(codeGen->isFramePointerUsed()); + int PSP_MonAcquire_Size = 0; +#if FEATURE_EH_FUNCLETS + if (lvaPSPSym != BAD_VAR_NUM) + { + // If we need a PSPSym, allocate it first, before anything else, including + // padding (so we can avoid computing the same padding in the funclet + // frame). Note that there is no special padding requirement for the PSPSym. + noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs + TARGET_POINTER_SIZE); + PSP_MonAcquire_Size += TARGET_POINTER_SIZE; + } +#endif // FEATURE_EH_FUNCLETS + + if (lvaMonAcquired != BAD_VAR_NUM) + { + // For OSR we use the flag set up by the original method. + if (opts.IsOSR()) + { + originalFrameStkOffs += lvaLclSize(lvaMonAcquired); + assert(info.compPatchpointInfo->HasMonitorAcquired()); + int originalOffset = info.compPatchpointInfo->MonitorAcquiredOffset(); + int offset = originalFrameStkOffs + PSP_MonAcquire_Size + originalOffset; + + JITDUMP( + "---OSR--- V%02u (on tier0 frame, monitor acquired) tier0 FP-rel offset %d tier0 frame offset %d new " + "virt offset %d\n", + lvaMonAcquired, originalOffset, originalFrameStkOffs + PSP_MonAcquire_Size, offset); + + lvaTable[lvaMonAcquired].SetStackOffset(offset); + } + else + { + // This var must go first, in what is called the 'frame header' for EnC so that it is + // preserved when remapping occurs. See vm\eetwain.cpp for detailed comment specifying frame + // layout requirements for EnC to work. + stkOffs = stkOffs + PSP_MonAcquire_Size + lvaLclSize(lvaMonAcquired); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs); + stkOffs -= PSP_MonAcquire_Size; + PSP_MonAcquire_Size += lvaLclSize(lvaMonAcquired); + } + } + if (opts.IsOSR()) + { + originalFrameStkOffs += PSP_MonAcquire_Size; + originalFrameSize = originalFrameStkOffs; + } + stkOffs = stkOffs - (compCalleeRegsPushed << 3); + +#else if (lvaMonAcquired != BAD_VAR_NUM) { // For OSR we use the flag set up by the original method. @@ -6262,8 +6344,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs); } } +#endif -#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) +#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_RISCV64)) if (lvaPSPSym != BAD_VAR_NUM) { // On ARM/ARM64, if we need a PSPSym we allocate it early since funclets @@ -6272,7 +6355,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); } -#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64) +#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_RISCV64) if (mustDoubleAlign) { @@ -6889,8 +6972,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // FEATURE_FIXED_OUT_ARGS -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For LoongArch64 and RISCV64, CalleeSavedRegs are at bottom. +#if defined(TARGET_LOONGARCH64) + noway_assert(compLclFrameSize + originalFrameSize == (unsigned)(originalFrameStkOffs + PSP_MonAcquire_Size - stkOffs)); + +#else + +#if defined(TARGET_RISCV64) + // For RISCV64, CalleeSavedRegs are at bottom. int pushedCount = 0; #else // compLclFrameSize equals our negated virtual stack offset minus the pushed registers and return address @@ -6915,6 +7003,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() noway_assert(compLclFrameSize + originalFrameSize == (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE))); +#endif } int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs) @@ -6944,8 +7033,14 @@ int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, i #endif )) { - // Note that stack offsets are negative or equal to zero +#if !defined(TARGET_LOONGARCH64) + // Note that stack offsets are negative or equal to zero except the LoongArch64. + // + // But for LoongArch64, after the `lvaAssignVirtualFrameOffsetsToLocals`, there is no need + // to recompute the LclVarDsc's StackOffset within the `lvaFixVirtualFrameOffsets`. + // And the offsets of the callee saved registers, PSPSym and MonitorAcquired are positive. assert(stkOffs <= 0); +#endif // alignment padding unsigned pad = 0;