diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt
index 8c966bb3403b5..34b3843d6893e 100644
--- a/src/coreclr/gcinfo/CMakeLists.txt
+++ b/src/coreclr/gcinfo/CMakeLists.txt
@@ -75,6 +75,10 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
   create_gcinfo_lib(TARGET gcinfo_win_x64 OS win ARCH x64)
 endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
 
+if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64)
+endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+
 create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
 create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
 
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 18a6650afc37e..13a7e4629e353 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -745,7 +745,12 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
 //
 // Hardware Intrinsic ISAs
 //
+#if defined(TARGET_LOONGARCH64)
+//TODO: should implement LoongArch64's features.
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic,  W("EnableHWIntrinsic"),  0, "Allows Base+ hardware intrinsics to be disabled")
+#else
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic,  W("EnableHWIntrinsic"),  1, "Allows Base+ hardware intrinsics to be disabled")
+#endif // defined(TARGET_LOONGARCH64)
 
 #if defined(TARGET_AMD64) || defined(TARGET_X86)
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES,          W("EnableAES"),          1, "Allows AES+ hardware intrinsics to be disabled")
diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h
index 63a48d0e4ceea..1a7fdb37b9c25 100644
--- a/src/coreclr/inc/crosscomp.h
+++ b/src/coreclr/inc/crosscomp.h
@@ -399,7 +399,7 @@ enum
 
 #define CONTEXT_UNWOUND_TO_CALL 0x20000000
 
-typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT {
+typedef struct DECLSPEC_ALIGN(8) _T_CONTEXT {
 
     //
     // Control flags.
@@ -414,8 +414,8 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT {
     DWORD64 Ra;
     DWORD64 Tp;
     DWORD64 Sp;
-    DWORD64 A0;//DWORD64 V0;
-    DWORD64 A1;//DWORD64 V1;
+    DWORD64 A0;
+    DWORD64 A1;
     DWORD64 A2;
     DWORD64 A3;
     DWORD64 A4;
@@ -447,7 +447,7 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT {
     //
     // Floating Point Registers
     //
-    //TODO: support the SIMD.
+    //TODO-LoongArch64: support the SIMD.
     DWORD64 F[32];
     DWORD   Fcsr;
 } T_CONTEXT, *PT_CONTEXT;
@@ -469,7 +469,6 @@ typedef struct _T_RUNTIME_FUNCTION {
     };
 } T_RUNTIME_FUNCTION, *PT_RUNTIME_FUNCTION;
 
-
 //
 // Define exception dispatch context structure.
 //
@@ -489,8 +488,6 @@ typedef struct _T_DISPATCHER_CONTEXT {
     PBYTE  NonVolatileRegisters;
 } T_DISPATCHER_CONTEXT, *PT_DISPATCHER_CONTEXT;
 
-
-
 //
 // Nonvolatile context pointer record.
 //
diff --git a/src/coreclr/inc/palclr.h b/src/coreclr/inc/palclr.h
index 2ab9c62c3e844..40fe2d1d3a2d1 100644
--- a/src/coreclr/inc/palclr.h
+++ b/src/coreclr/inc/palclr.h
@@ -606,4 +606,8 @@
 
 #include "palclr_win.h"
 
+#ifndef IMAGE_FILE_MACHINE_LOONGARCH64
+#define IMAGE_FILE_MACHINE_LOONGARCH64       0x6264  // LOONGARCH64.
+#endif
+
 #endif // defined(HOST_WINDOWS)
diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h
index b2d1c06a22d66..9025a8608af0f 100644
--- a/src/coreclr/inc/targetosarch.h
+++ b/src/coreclr/inc/targetosarch.h
@@ -41,27 +41,38 @@ class TargetArchitecture
     static const bool IsArm64 = false;
     static const bool IsArm32 = true;
     static const bool IsArmArch = true;
+    static const bool IsLoongArch64 = false;
 #elif defined(TARGET_ARM64)
     static const bool IsX86 = false;
     static const bool IsX64 = false;
     static const bool IsArm64 = true;
     static const bool IsArm32 = false;
     static const bool IsArmArch = true;
+    static const bool IsLoongArch64 = false;
 #elif defined(TARGET_AMD64)
     static const bool IsX86 = false;
     static const bool IsX64 = true;
     static const bool IsArm64 = false;
     static const bool IsArm32 = false;
     static const bool IsArmArch = false;
+    static const bool IsLoongArch64 = false;
 #elif defined(TARGET_X86)
     static const bool IsX86 = true;
     static const bool IsX64 = false;
     static const bool IsArm64 = false;
     static const bool IsArm32 = false;
     static const bool IsArmArch = false;
+    static const bool IsLoongArch64 = false;
+#elif defined(TARGET_LOONGARCH64)
+    static const bool IsX86 = false;
+    static const bool IsX64 = false;
+    static const bool IsArm64 = false;
+    static const bool IsArm32 = false;
+    static const bool IsArmArch = false;
+    static const bool IsLoongArch64 = true;
 #else
 #error Unknown architecture
 #endif
 };
 
-#endif // targetosarch_h
\ No newline at end of file
+#endif // targetosarch_h
diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index 9a088b2e9a267..927bf7a238ac5 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -44,6 +44,9 @@ function(create_standalone_jit)
   elseif(TARGETDETAILS_ARCH STREQUAL "s390x")
     set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES})
     set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS})
+  elseif(TARGETDETAILS_ARCH STREQUAL "loongarch64")
+    set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES})
+    set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS})
   else()
     clr_unknown_arch()
   endif()
@@ -233,6 +236,15 @@ set( JIT_S390X_SOURCES
   # Not supported as JIT target
 )
 
+set( JIT_LOONGARCH64_SOURCES
+  codegenloongarch64.cpp
+  emitloongarch64.cpp
+  lowerloongarch64.cpp
+  lsraloongarch64.cpp
+  targetloongarch64.cpp
+  unwindloongarch64.cpp
+)
+
 # We include the headers here for better experience in IDEs.
 set( JIT_HEADERS
   ../inc/corinfo.h
@@ -379,6 +391,13 @@ set ( JIT_S390X_HEADERS
   # Not supported as JIT target
 )
 
+set( JIT_LOONGARCH64_HEADERS
+    emitloongarch64.h
+    emitfmtsloongarch64.h
+    instrsloongarch64.h
+    registerloongarch64.h
+)
+
 convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES})
 convert_to_absolute_path(JIT_HEADERS ${JIT_HEADERS})
 convert_to_absolute_path(JIT_RESOURCES ${JIT_RESOURCES})
@@ -397,6 +416,8 @@ convert_to_absolute_path(JIT_ARMV6_SOURCES ${JIT_ARMV6_SOURCES})
 convert_to_absolute_path(JIT_ARMV6_HEADERS ${JIT_ARMV6_HEADERS})
 convert_to_absolute_path(JIT_S390X_SOURCES ${JIT_S390X_SOURCES})
 convert_to_absolute_path(JIT_S390X_HEADERS ${JIT_S390X_HEADERS})
+convert_to_absolute_path(JIT_LOONGARCH64_SOURCES ${JIT_LOONGARCH64_SOURCES})
+convert_to_absolute_path(JIT_LOONGARCH64_HEADERS ${JIT_LOONGARCH64_HEADERS})
 
 if(CLR_CMAKE_TARGET_ARCH_AMD64)
   set(JIT_ARCH_SOURCES ${JIT_AMD64_SOURCES})
@@ -416,6 +437,9 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
 elseif(CLR_CMAKE_TARGET_ARCH_S390X)
   set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES})
   set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS})
+elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES})
+  set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS})
 else()
   clr_unknown_arch()
 endif()
@@ -558,6 +582,10 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
   create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .)
 endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
 
+if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
+endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+
 create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .)
 target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI)
 create_standalone_jit(TARGET clrjit_win_x86_${ARCH_HOST_NAME} OS win ARCH x86 DESTINATIONS .)
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 0c5ae4c0fffa4..f1c1b49b2578b 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -235,7 +235,16 @@ class CodeGen final : public CodeGenInterface
 
     void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk = nullptr);
 
+#ifdef TARGET_LOONGARCH64
+    void genSetRegToIcon(regNumber reg, ssize_t val, var_types type);
+    void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind,
+                                 instruction     ins,
+                                 regNumber       reg1,
+                                 BasicBlock*     failBlk = nullptr,
+                                 regNumber       reg2    = REG_R0);
+#else
     void genCheckOverflow(GenTree* tree);
+#endif
 
     //-------------------------------------------------------------------------
     //
@@ -251,7 +260,11 @@ class CodeGen final : public CodeGenInterface
     //
 
     void genEstablishFramePointer(int delta, bool reportUnwindData);
+#if defined(TARGET_LOONGARCH64)
+    void genFnPrologCalleeRegArgs();
+#else
     void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
+#endif
     void genEnregisterIncomingStackArgs();
 #if defined(TARGET_ARM64)
     void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed);
@@ -263,7 +276,7 @@ class CodeGen final : public CodeGenInterface
     void genClearStackVec3ArgUpperBits();
 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     bool genInstrWithConstant(instruction ins,
                               emitAttr    attr,
                               regNumber   reg1,
@@ -323,6 +336,7 @@ class CodeGen final : public CodeGenInterface
     void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
 
     void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
+
 #else
     void genPushCalleeSavedRegisters();
 #endif
@@ -408,7 +422,25 @@ class CodeGen final : public CodeGenInterface
 
     FuncletFrameInfoDsc genFuncletInfo;
 
-#endif // TARGET_AMD64
+#elif defined(TARGET_LOONGARCH64)
+
+    // A set of information that is used by funclet prolog and epilog generation.
+    // It is collected once, before funclet prologs and epilogs are generated,
+    // and used by all funclet prologs and epilogs, which must all be the same.
+    struct FuncletFrameInfoDsc
+    {
+        regMaskTP fiSaveRegs;                // Set of callee-saved registers saved in the funclet prolog (includes RA)
+        int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
+                                             // (negative)
+        int fiSP_to_FPRA_save_delta;         // FP/RA register save offset from SP (positive)
+        int fiSP_to_PSP_slot_delta;          // PSP slot offset from SP (positive)
+        int fiCallerSP_to_PSP_slot_delta;    // PSP slot offset from Caller SP (negative)
+        int fiFrameType;                     // Funclet frame types are numbered. See genFuncletProlog() for details.
+        int fiSpDelta1;                      // Stack pointer delta 1 (negative)
+    };
+
+    FuncletFrameInfoDsc genFuncletInfo;
+#endif // TARGET_LOONGARCH64
 
 #if defined(TARGET_XARCH)
 
@@ -598,6 +630,10 @@ class CodeGen final : public CodeGenInterface
     void genArm64EmitterUnitTests();
 #endif
 
+#if defined(DEBUG) && defined(TARGET_LOONGARCH64)
+    void genLoongArch64EmitterUnitTests();
+#endif
+
 #if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64)
     void genAmd64EmitterUnitTests();
 #endif
@@ -1234,8 +1270,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genCodeForStoreLclFld(GenTreeLclFld* tree);
     void genCodeForStoreLclVar(GenTreeLclVar* tree);
     void genCodeForReturnTrap(GenTreeOp* tree);
-    void genCodeForJcc(GenTreeCC* tree);
-    void genCodeForSetcc(GenTreeCC* setcc);
     void genCodeForStoreInd(GenTreeStoreInd* tree);
     void genCodeForSwap(GenTreeOp* tree);
     void genCodeForCpObj(GenTreeObj* cpObjNode);
@@ -1324,7 +1358,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genTableBasedSwitch(GenTree* tree);
     void genCodeForArrIndex(GenTreeArrIndex* treeNode);
     void genCodeForArrOffset(GenTreeArrOffs* treeNode);
+#if defined(TARGET_LOONGARCH64)
+    instruction genGetInsForOper(GenTree* treeNode);
+#else
     instruction genGetInsForOper(genTreeOps oper, var_types type);
+#endif
     bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
     GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd);
     regNumber getCallIndirectionCellReg(const GenTreeCall* call);
@@ -1333,7 +1371,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genJmpMethod(GenTree* jmp);
     BasicBlock* genCallFinally(BasicBlock* block);
     void genCodeForJumpTrue(GenTreeOp* jtrue);
-#ifdef TARGET_ARM64
+#if defined(TARGET_LOONGARCH64)
+    // TODO: refactor for LA.
+    void genCodeForJumpCompare(GenTreeOp* tree);
+#endif
+#if defined(TARGET_ARM64)
     void genCodeForJumpCompare(GenTreeOp* tree);
     void genCodeForMadd(GenTreeOp* tree);
     void genCodeForBfiz(GenTreeOp* tree);
@@ -1349,6 +1391,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode);
     void genMultiRegStoreToLocal(GenTreeLclVar* lclNode);
 
+#if defined(TARGET_LOONGARCH64)
+    void genMultiRegCallStoreToLocal(GenTree* treeNode);
+#endif
+
     // Codegen for multi-register struct returns.
     bool isStructReturn(GenTree* treeNode);
 #ifdef FEATURE_SIMD
@@ -1364,9 +1410,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genFloatReturn(GenTree* treeNode);
 #endif // TARGET_X86
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     void genSimpleReturn(GenTree* treeNode);
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
 
     void genReturn(GenTree* treeNode);
 
@@ -1656,6 +1702,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
 #endif // TARGET_XARCH
 
+#ifndef TARGET_LOONGARCH64
     // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
     // such as X86's SETcc. A sequence of instructions rather than just a single one is required for
     // certain floating point conditions.
@@ -1699,6 +1746,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     void inst_JCC(GenCondition condition, BasicBlock* target);
     void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg);
+
+    void genCodeForJcc(GenTreeCC* tree);
+    void genCodeForSetcc(GenTreeCC* setcc);
+#endif // !TARGET_LOONGARCH64
 };
 
 // A simple phase that just invokes a method on the codegen instance
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 61bd8639262e1..0eccb2abfc8e5 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -127,9 +127,9 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
     /* Assume that we not fully interruptible */
 
     SetInterruptible(false);
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     SetHasTailCalls(false);
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 #ifdef DEBUG
     genInterruptibleUsed = false;
     genCurDispOffset     = (unsigned)-1;
@@ -596,7 +596,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
         case CORINFO_HELP_ASSIGN_BYREF:
 #if defined(TARGET_AMD64)
             return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
             return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
 #elif defined(TARGET_X86)
             return RBM_ESI | RBM_EDI | RBM_ECX;
@@ -605,7 +605,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
             return RBM_CALLEE_TRASH;
 #endif
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         case CORINFO_HELP_ASSIGN_REF:
         case CORINFO_HELP_CHECKED_ASSIGN_REF:
             return RBM_CALLEE_TRASH_WRITEBARRIER;
@@ -1171,7 +1171,7 @@ bool CodeGen::genCreateAddrMode(
 
         cns += op2->AsIntConCommon()->IconValue();
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         if (cns == 0)
 #endif
         {
@@ -1191,7 +1191,7 @@ bool CodeGen::genCreateAddrMode(
 
                     goto AGAIN;
 
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
                 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
                 case GT_MUL:
                     if (op1->gtOverflow())
@@ -1214,7 +1214,7 @@ bool CodeGen::genCreateAddrMode(
                         goto FOUND_AM;
                     }
                     break;
-#endif // !defined(TARGET_ARMARCH)
+#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
 
                 default:
                     break;
@@ -1235,7 +1235,7 @@ bool CodeGen::genCreateAddrMode(
 
     switch (op1->gtOper)
     {
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
         case GT_ADD:
 
@@ -1294,7 +1294,7 @@ bool CodeGen::genCreateAddrMode(
                 goto FOUND_AM;
             }
             break;
-#endif // !TARGET_ARMARCH
+#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64
 
         case GT_NOP:
 
@@ -1313,7 +1313,7 @@ bool CodeGen::genCreateAddrMode(
     noway_assert(op2);
     switch (op2->gtOper)
     {
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because
         // arm doesn't support both scale and offset at the same. Offset is handled
         // at the emitter as a peephole optimization.
@@ -1370,7 +1370,7 @@ bool CodeGen::genCreateAddrMode(
                 goto FOUND_AM;
             }
             break;
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
         case GT_NOP:
 
@@ -1593,7 +1593,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
     else
     {
         // The code to throw the exception will be generated inline, and
-        //  we will jump around it in the normal non-exception case.
+        // we will jump around it in the normal non-exception case.
 
         BasicBlock*  tgtBlk          = nullptr;
         emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
@@ -1620,6 +1620,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
  * have set the flags. Check if the operation caused an overflow.
  */
 
+#ifndef TARGET_LOONGARCH64
 // inline
 void CodeGen::genCheckOverflow(GenTree* tree)
 {
@@ -1666,6 +1667,7 @@ void CodeGen::genCheckOverflow(GenTree* tree)
 
     genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
 }
+#endif
 
 #if defined(FEATURE_EH_FUNCLETS)
 
@@ -1815,6 +1817,10 @@ void CodeGen::genGenerateMachineCode()
         {
             printf("generic ARM64 CPU");
         }
+        else if (compiler->info.genCPU == CPU_LOONGARCH64)
+        {
+            printf("generic LOONGARCH64 CPU");
+        }
         else
         {
             printf("unknown architecture");
@@ -2019,7 +2025,7 @@ void CodeGen::genEmitMachineCode()
 
     bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     trackedStackPtrsContig = false;
 #elif defined(TARGET_ARM)
     // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
@@ -2938,6 +2944,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #pragma warning(push)
 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
 #endif
+
+#ifndef TARGET_LOONGARCH64
 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
 {
 #ifdef DEBUG
@@ -4228,6 +4236,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
         noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
     }
 }
+#endif // !TARGET_LOONGARCH64
+
 #ifdef _PREFAST_
 #pragma warning(pop)
 #endif
@@ -4252,6 +4262,11 @@ void CodeGen::genEnregisterIncomingStackArgs()
 
     unsigned varNum = 0;
 
+#ifdef TARGET_LOONGARCH64
+    int       tmp_offset = 0;
+    regNumber tmp_reg    = REG_NA;
+#endif
+
     for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
     {
         /* Is this variable a parameter? */
@@ -4298,8 +4313,38 @@ void CodeGen::genEnregisterIncomingStackArgs()
         assert(regNum != REG_STK);
 
         var_types regType = varDsc->GetStackSlotHomeType();
+#ifdef TARGET_LOONGARCH64
+        {
+            bool FPbased;
+            int  base = compiler->lvaFrameAddress(varNum, &FPbased);
 
+            if (emitter::isValidSimm12(base))
+            {
+                GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
+            }
+            else
+            {
+                if (tmp_reg == REG_NA)
+                {
+                    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+                    tmp_offset     = base;
+                    tmp_reg        = REG_R21;
+
+                    GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
+                    GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8);
+                }
+                else
+                {
+                    int baseOffset = -(base - tmp_offset) - 8;
+                    GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset);
+                }
+            }
+        }
+#else // !TARGET_LOONGARCH64
         GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
+#endif // !TARGET_LOONGARCH64
+
         regSet.verifyRegUsed(regNum);
 #ifdef USING_SCOPE_INFO
         psiMoveToReg(varNum);
@@ -4600,6 +4645,9 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
 #elif defined(TARGET_ARM64)
                 // We will just zero out the entire vector register. This sets it to a double/float zero value
                 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#elif defined(TARGET_LOONGARCH64)
+                // We will just zero out the entire vector register. This sets it to a double/float zero value
+                GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif
@@ -4635,6 +4683,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
 #elif defined(TARGET_ARM64)
                 // We will just zero out the entire vector register. This sets it to a double/float zero value
                 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#elif defined(TARGET_LOONGARCH64)
+                GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif
@@ -4650,6 +4700,8 @@ regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
 {
 #ifdef TARGET_ARM64
     return REG_ZR;
+#elif defined(TARGET_LOONGARCH64)
+    return REG_R0;
 #else  // !TARGET_ARM64
     if (*pInitRegZeroed == false)
     {
@@ -5057,11 +5109,14 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed
     // ARM's emitIns_R_R_I automatically uses the reserved register if necessary.
     GetEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
                                 compiler->lvaCachedGenericContextArgOffset());
-#else  // !ARM64 !ARM
+#elif defined(TARGET_LOONGARCH64)
+    genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+                         compiler->lvaCachedGenericContextArgOffset(), REG_R21);
+#else  // !ARM64 !ARM !LOONGARCH64
     // mov [ebp-lvaCachedGenericContextArgOffset()], reg
     GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
                                compiler->lvaCachedGenericContextArgOffset());
-#endif // !ARM64 !ARM
+#endif // !ARM64 !ARM !LOONGARCH64
 }
 
 /*****************************************************************************
@@ -5444,6 +5499,23 @@ void CodeGen::genFinalizeFrame()
     maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
 #endif // defined(TARGET_XARCH)
 
+#ifdef TARGET_LOONGARCH64
+    if (isFramePointerUsed())
+    {
+        // For a FP based frame we have to push/pop the FP register
+        //
+        maskCalleeRegsPushed |= RBM_FPBASE;
+
+        // This assert check that we are not using REG_FP
+        // as both the frame pointer and as a codegen register
+        //
+        assert(!regSet.rsRegsModified(RBM_FPBASE));
+    }
+
+    // we always push RA.  See genPushCalleeSavedRegisters
+    maskCalleeRegsPushed |= RBM_RA;
+#endif // TARGET_LOONGARCH64
+
     compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
 
 #ifdef DEBUG
@@ -5566,10 +5638,10 @@ void CodeGen::genFnProlog()
         instGen(INS_nop);
         instGen(INS_BREAKPOINT);
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         // Avoid asserts in the unwind info because these instructions aren't accounted for.
         compiler->unwindPadding();
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
     }
 #endif // DEBUG
 
@@ -5975,14 +6047,16 @@ void CodeGen::genFnProlog()
     }
 #endif // TARGET_XARCH
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     genPushCalleeSavedRegisters(initReg, &initRegZeroed);
-#else  // !TARGET_ARM64
+
+#else // !TARGET_ARM64 || !TARGET_LOONGARCH64
+
     if (!isOSRx64Root)
     {
         genPushCalleeSavedRegisters();
     }
-#endif // !TARGET_ARM64
+#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64
 
 #ifdef TARGET_ARM
     bool needToEstablishFP        = false;
@@ -6013,7 +6087,7 @@ void CodeGen::genFnProlog()
     //-------------------------------------------------------------------------
     CLANG_FORMAT_COMMENT_ANCHOR;
 
-#ifndef TARGET_ARM64
+#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
     regMaskTP maskStackAlloc = RBM_NONE;
 
 #ifdef TARGET_ARM
@@ -6026,7 +6100,7 @@ void CodeGen::genFnProlog()
         genAllocLclFrame(compiler->compLclFrameSize + extraFrameSize, initReg, &initRegZeroed,
                          intRegState.rsCalleeRegArgMaskLiveIn);
     }
-#endif // !TARGET_ARM64
+#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64
 
 #ifdef TARGET_AMD64
     // For x64 OSR we have to finish saving int callee saves.
@@ -6201,6 +6275,13 @@ void CodeGen::genFnProlog()
     {
         compiler->lvaUpdateArgsWithInitialReg();
 
+#if defined(TARGET_LOONGARCH64)
+        if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn)
+        {
+            initRegZeroed = false;
+            genFnPrologCalleeRegArgs();
+        }
+#else
         auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) {
             if (regState->rsCalleeRegArgMaskLiveIn)
             {
@@ -6237,6 +6318,8 @@ void CodeGen::genFnProlog()
         assignIncomingRegisterArgs(&intRegState);
 #endif
 
+#endif // TARGET_LOONGARCH64
+
         // Home the incoming arguments.
         genEnregisterIncomingStackArgs();
     }
@@ -6564,7 +6647,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo
     structPassingKind howToReturnStruct;
     var_types         returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct);
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
 #else
     return (varTypeIsStruct(returnType));
@@ -6672,7 +6755,7 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
 //
 unsigned CodeGen::getFirstArgWithStackSlot()
 {
-#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     unsigned baseVarNum = 0;
     // Iterate over all the lvParam variables in the Lcl var table until we find the first one
     // that's passed on the stack.
@@ -7834,9 +7917,9 @@ void CodeGen::genReturn(GenTree* treeNode)
             // exit point where it is actually dead.
             genConsumeReg(op1);
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             genSimpleReturn(treeNode);
-#else // !TARGET_ARM64
+#else // !TARGET_ARM64 || !TARGET_LOONGARCH64
 #if defined(TARGET_X86)
             if (varTypeUsesFloatReg(treeNode))
             {
@@ -7864,7 +7947,7 @@ void CodeGen::genReturn(GenTree* treeNode)
                 regNumber retReg = varTypeUsesFloatReg(treeNode) ? REG_FLOATRET : REG_INTRET;
                 inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true);
             }
-#endif // !TARGET_ARM64
+#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64
         }
     }
 
@@ -8058,6 +8141,22 @@ void CodeGen::genStructReturn(GenTree* treeNode)
         GenTreeLclVar* lclNode = actualOp1->AsLclVar();
         LclVarDsc*     varDsc  = compiler->lvaGetDesc(lclNode);
         assert(varDsc->lvIsMultiRegRet);
+#ifdef TARGET_LOONGARCH64
+        // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }",
+        // i. e. not include the padding for the first field, and so the general loop below won't work.
+        var_types type  = retTypeDesc.GetReturnRegType(0);
+        regNumber toReg = retTypeDesc.GetABIReturnReg(0);
+        GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
+        if (regCount > 1)
+        {
+            assert(regCount == 2);
+            int offset = genTypeSize(type);
+            type       = retTypeDesc.GetReturnRegType(1);
+            offset     = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset);
+            toReg      = retTypeDesc.GetABIReturnReg(1);
+            GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
+        }
+#else  // !TARGET_LOONGARCH64
         int offset = 0;
         for (unsigned i = 0; i < regCount; ++i)
         {
@@ -8066,6 +8165,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
             offset += genTypeSize(type);
         }
+#endif // !TARGET_LOONGARCH64
     }
     else
     {
diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h
index f276a492da33d..dbd53ffbad46f 100644
--- a/src/coreclr/jit/codegeninterface.h
+++ b/src/coreclr/jit/codegeninterface.h
@@ -112,7 +112,7 @@ class CodeGenInterface
 private:
 #if defined(TARGET_XARCH)
     static const insFlags instInfo[INS_count];
-#elif defined(TARGET_ARM) || defined(TARGET_ARM64)
+#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     static const BYTE instInfo[INS_count];
 #else
 #error Unsupported target architecture
@@ -360,7 +360,7 @@ class CodeGenInterface
         m_cgInterruptible = value;
     }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
     bool GetHasTailCalls()
     {
@@ -374,9 +374,9 @@ class CodeGenInterface
 
 private:
     bool m_cgInterruptible;
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     bool m_cgHasTailCalls;
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     //  The following will be set to true if we've determined that we need to
     //  generate a full-blown pointer register map for the current method.
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 9c09d423e85ef..83efe5685c3ba 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -1211,7 +1211,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
             assert(spillType != TYP_UNDEF);
 
 // TODO-Cleanup: The following code could probably be further merged and cleaned up.
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // Load local variable from its home location.
             // Never allow truncating the locals here, otherwise a subsequent
             // use of the local with a wider type would see the truncated
@@ -1223,6 +1223,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
             {
                 spillType = lclLoadType;
             }
+
+#if defined(TARGET_LOONGARCH64)
+            if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum()))
+            {
+                spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
+            }
+#endif
 #elif defined(TARGET_ARM)
 // No normalizing for ARM
 #else
@@ -2518,7 +2525,13 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
             m_checkKind = CHECK_NONE;
         }
 
-        m_extendKind    = COPY;
+#ifdef TARGET_LOONGARCH64
+        // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need
+        // the upper 32bits be sign-extended to 64 bits.
+        m_extendKind = SIGN_EXTEND_INT;
+#else
+        m_extendKind = COPY;
+#endif
         m_extendSrcSize = 4;
     }
 #endif
@@ -2595,6 +2608,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode)
 }
 #endif // !defined(TARGET_64BIT)
 
+#ifndef TARGET_LOONGARCH64
 //------------------------------------------------------------------------
 // genCodeForJumpTrue: Generate code for a GT_JTRUE node.
 //
@@ -2697,3 +2711,4 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
     inst_SETCC(setcc->gtCondition, setcc->TypeGet(), setcc->GetRegNum());
     genProduceReg(setcc);
 }
+#endif // !TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
new file mode 100644
index 0000000000000..afe5b0b95d5bd
--- /dev/null
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -0,0 +1,9370 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                        LOONGARCH64 Code Generator                         XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_LOONGARCH64
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Prolog / Epilog                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//------------------------------------------------------------------------
+// genInstrWithConstant:   we will typically generate one instruction
+//
+//    ins  reg1, reg2, imm
+//
+// However the imm might not fit as a directly encodable immediate,
+// when it doesn't fit we generate extra instruction(s) that sets up
+// the 'tmpReg' with the proper immediate value.
+//
+//     li  tmpReg, imm    // li is pseudo instruction here which maybe 2-4 instructions.
+//     ins  reg1, reg2, tmpReg
+//
+// Arguments:
+//    ins                 - instruction
+//    attr                - operation size and GC attribute
+//    reg1, reg2          - first and second register operands
+//    imm                 - immediate value (third operand when it fits)
+//    tmpReg              - temp register to use when the 'imm' doesn't fit. Can be REG_NA
+//                          if caller knows for certain the constant will fit.
+//    inUnwindRegion      - true if we are in a prolog/epilog region with unwind codes.
+//                          Default: false.
+//
+// Return Value:
+//    returns true if the immediate was small enough to be encoded inside instruction. If not,
+//    returns false meaning the immediate was too large and tmpReg was used and modified.
+//
+bool CodeGen::genInstrWithConstant(instruction ins,
+                                   emitAttr    attr,
+                                   regNumber   reg1,
+                                   regNumber   reg2,
+                                   ssize_t     imm,
+                                   regNumber   tmpReg,
+                                   bool        inUnwindRegion /* = false */)
+{
+    emitAttr size = EA_SIZE(attr);
+
+    // reg1 is usually a dest register
+    // reg2 is always source register
+    assert(tmpReg != reg2); // tmpReg can not match any source register
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_addi_d:
+
+        case INS_st_b:
+        case INS_st_h:
+        case INS_st_w:
+        case INS_fst_s:
+        case INS_st_d:
+        case INS_fst_d:
+
+        case INS_ld_b:
+        case INS_ld_h:
+        case INS_ld_w:
+        case INS_fld_s:
+        case INS_ld_d:
+        case INS_fld_d:
+            break;
+
+        default:
+            assert(!"Unexpected instruction in genInstrWithConstant");
+            break;
+    }
+#endif
+    bool immFitsInIns = emitter::isValidSimm12(imm);
+
+    if (immFitsInIns)
+    {
+        // generate a single instruction that encodes the immediate directly
+        GetEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+    }
+    else
+    {
+        // caller can specify REG_NA  for tmpReg, when it "knows" that the immediate will always fit
+        assert(tmpReg != REG_NA);
+
+        // generate two or more instructions
+
+        // first we load the immediate into tmpReg
+        assert(!EA_IS_RELOC(size));
+        GetEmitter()->emitIns_I_la(size, tmpReg, imm);
+        regSet.verifyRegUsed(tmpReg);
+
+        // when we are in an unwind code region
+        // we record the extra instructions using unwindPadding()
+        if (inUnwindRegion)
+        {
+            compiler->unwindPadding();
+        }
+
+        if (ins == INS_addi_d)
+        {
+            GetEmitter()->emitIns_R_R_R(INS_add_d, attr, reg1, reg2, tmpReg);
+        }
+        else
+        {
+            GetEmitter()->emitIns_R_R_R(INS_add_d, attr, tmpReg, reg2, tmpReg);
+            GetEmitter()->emitIns_R_R_I(ins, attr, reg1, tmpReg, 0);
+        }
+    }
+    return immFitsInIns;
+}
+
+//------------------------------------------------------------------------
+// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
+// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
+// register is required to be specified, in case the constant is too large to encode in an "daddu"
+// instruction (or "dsubu" instruction if we choose to use one), such that we need to load the constant
+// into a register first, before using it.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP (can be negative)
+//    tmpReg                  - an available temporary register
+//    pTmpRegIsZero           - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                              Otherwise, we don't touch it.
+//    reportUnwindData        - If true, report the change in unwind data. Otherwise, do not report it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData)
+{
+    // Even though INS_addi_d is specified here, the encoder will choose either
+    // an INS_add_d or an INS_addi_d and encode the immediate as a positive value
+    //
+    bool wasTempRegisterUsedForImm =
+        !genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true);
+    if (wasTempRegisterUsedForImm)
+    {
+        if (pTmpRegIsZero != nullptr)
+        {
+            *pTmpRegIsZero = false;
+        }
+    }
+
+    if (reportUnwindData)
+    {
+        // spDelta is negative in the prolog, positive in the epilog,
+        // but we always tell the unwind codes the positive value.
+        ssize_t  spDeltaAbs    = abs(spDelta);
+        unsigned unwindSpDelta = (unsigned)spDeltaAbs;
+        assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
+
+        compiler->unwindAllocStack(unwindSpDelta);
+    }
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to save.
+//    reg2                     - Second register of pair to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    useSaveNextPair          - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genPrologSaveRegPair(regNumber reg1,
+                                   regNumber reg2,
+                                   int       spOffset,
+                                   int       spDelta,
+                                   bool      useSaveNextPair,
+                                   regNumber tmpReg,
+                                   bool*     pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta <= 0);
+    assert((spDelta % 16) == 0);                                  // SP changes must be 16-byte aligned
+    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+                                                                  // FP/SIMD
+
+    instruction ins = INS_st_d;
+    if (genIsValidFloatReg(reg1))
+    {
+        ins = INS_fst_d;
+    }
+
+    if (spDelta != 0)
+    {
+        // generate addi.d SP,SP,-imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+
+        assert((spDelta + spOffset + 16) <= 0);
+
+        assert(spOffset <= 2031); // 2047-16
+    }
+
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    compiler->unwindSaveReg(reg1, spOffset);
+
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+    compiler->unwindSaveReg(reg2, spOffset + 8);
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed SD addressing). The caller must ensure that we can use the
+// SD instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+//    reg1                     - Register to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta <= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    instruction ins = INS_st_d;
+    if (genIsValidFloatReg(reg1))
+    {
+        ins = INS_fst_d;
+    }
+
+    if (spDelta != 0)
+    {
+        // generate daddiu SP,SP,-imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+    }
+
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    compiler->unwindSaveReg(reg1, spOffset);
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
+// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
+// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to restore.
+//    reg2                     - Second register of pair to restore.
+//    spOffset                 - The offset from SP to load reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    useSaveNextPair          - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
+                                      regNumber reg2,
+                                      int       spOffset,
+                                      int       spDelta,
+                                      bool      useSaveNextPair,
+                                      regNumber tmpReg,
+                                      bool*     pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta >= 0);
+    assert((spDelta % 16) == 0);                                  // SP changes must be 16-byte aligned
+    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+                                                                  // FP/SIMD
+
+    instruction ins = INS_ld_d;
+    if (genIsValidFloatReg(reg1))
+    {
+        ins = INS_fld_d;
+    }
+
+    if (spDelta != 0)
+    {
+        assert(!useSaveNextPair);
+
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+        compiler->unwindSaveReg(reg2, spOffset + 8);
+
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+
+        // generate daddiu SP,SP,imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+        compiler->unwindSaveReg(reg2, spOffset + 8);
+
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
+//
+// Arguments:
+//    reg1                     - Register to restore.
+//    spOffset                 - The offset from SP to restore reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta >= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    instruction ins = INS_ld_d;
+    if (genIsValidFloatReg(reg1))
+    {
+        ins = INS_fld_d;
+    }
+
+    if (spDelta != 0)
+    {
+        // ld reg1, offset(SP)
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+
+        // generate add SP,SP,imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask.
+// The first register pair will contain the lowest register. Register pairs will combine neighbor
+// registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with
+// odd number of regs) then the second element of that RegPair will be REG_NA.
+//
+// Arguments:
+//   regsMask - a mask of registers for prolog/epilog generation;
+//   regStack - a regStack instance to build the stack in, used to save temp copyings.
+//
+// Return value:
+//   no return value; the regStack argument is modified.
+//
+// static
+void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* regStack)
+{
+    assert(regStack != nullptr);
+    assert(regStack->Height() == 0);
+
+    unsigned regsCount = genCountBits(regsMask);
+
+    while (regsMask != RBM_NONE)
+    {
+        regMaskTP reg1Mask = genFindLowestBit(regsMask);
+        regNumber reg1     = genRegNumFromMask(reg1Mask);
+        regsMask &= ~reg1Mask;
+        regsCount -= 1;
+
+        bool isPairSave = false;
+        if (regsCount > 0)
+        {
+            regMaskTP reg2Mask = genFindLowestBit(regsMask);
+            regNumber reg2     = genRegNumFromMask(reg2Mask);
+            if (reg2 == REG_NEXT(reg1))
+            {
+                // The JIT doesn't allow saving pair (S7,FP), even though the
+                // save_regp register pair unwind code specification allows it.
+                // The JIT always saves (FP,RA) as a pair, and uses the save_fpra
+                // unwind code. This only comes up in stress mode scenarios
+                // where callee-saved registers are not allocated completely
+                // from lowest-to-highest, without gaps.
+                if (reg1 != REG_FP)
+                {
+                    // Both registers must have the same type to be saved as pair.
+                    if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2))
+                    {
+                        isPairSave = true;
+
+                        regsMask &= ~reg2Mask;
+                        regsCount -= 1;
+
+                        regStack->Push(RegPair(reg1, reg2));
+                    }
+                }
+            }
+        }
+
+        if (!isPairSave)
+        {
+            regStack->Push(RegPair(reg1));
+        }
+    }
+    assert(regsCount == 0 && regsMask == RBM_NONE);
+
+    genSetUseSaveNextPairs(regStack);
+}
+
+//------------------------------------------------------------------------
+// genSetUseSaveNextPairs: Set useSaveNextPair for each RegPair on the stack which unwind info can be encoded as
+// save_next code.
+//
+// Arguments:
+//   regStack - a regStack instance to set useSaveNextPair.
+//
+// Notes:
+// We can use save_next for RegPair(N, N+1) only when we have sequence like (N-2, N-1), (N, N+1).
+// In this case in the prolog save_next for (N, N+1) refers to save_pair(N-2, N-1);
+// in the epilog the unwinder will search for the first save_pair (N-2, N-1)
+// and then go back to the first save_next (N, N+1) to restore it first.
+//
+// static
+void CodeGen::genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack)
+{
+    for (int i = 1; i < regStack->Height(); ++i)
+    {
+        RegPair& curr = regStack->BottomRef(i);
+        RegPair  prev = regStack->Bottom(i - 1);
+
+        if (prev.reg2 == REG_NA || curr.reg2 == REG_NA)
+        {
+            continue;
+        }
+
+        if (REG_NEXT(prev.reg2) != curr.reg1)
+        {
+            continue;
+        }
+
+        if (genIsValidFloatReg(prev.reg2) != genIsValidFloatReg(curr.reg1))
+        {
+            // It is possible to support changing of the last int pair with the first float pair,
+            // but it is very rare case and it would require superfluous changes in the unwinder.
+            continue;
+        }
+        curr.useSaveNextPair = true;
+    }
+}
+
+//------------------------------------------------------------------------
+// genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask.
+//
+// Arguments:
+//   regsMask - a mask of registers for prolog/epilog generation.
+//
+// Return value:
+//   stack slot size in bytes.
+//
+// Note: Because int and float register type sizes match we can call this function with a mask that includes both.
+//
+// static
+int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
+{
+    assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_RA)) == regsMask); // Do not expect anything else.
+
+    static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+    return REGSIZE_BYTES;
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask.
+//
+// Arguments:
+//   regsMask             - a mask of registers for prolog generation;
+//   spDelta              - if non-zero, the amount to add to SP before the first register save (or together with it);
+//   spOffset             - the offset from SP that is the beginning of the callee-saved register area;
+//
+void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
+{
+    const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
+
+    ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
+    genBuildRegPairsStack(regsMask, &regStack);
+
+    for (int i = 0; i < regStack.Height(); ++i)
+    {
+        RegPair regPair = regStack.Bottom(i);
+        if (regPair.reg2 != REG_NA)
+        {
+            // We can use two SD instructions.
+            genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_R21,
+                                 nullptr);
+
+            spOffset += 2 * slotSize;
+        }
+        else
+        {
+            // No register pair; we use a SD instruction.
+            genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_R21, nullptr);
+            spOffset += slotSize;
+        }
+
+        spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+    }
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
+// in the function or funclet prolog. Registers are saved in register number order from low addresses
+// to high addresses. This means that integer registers are saved at lower addresses than floatint-point/SIMD
+// registers.
+//
+// If establishing frame pointer chaining, it must be done after saving the callee-saved registers.
+//
+// We can only use the instructions that are allowed by the unwind codes. The caller ensures that
+// there is enough space on the frame to store these registers, and that the store instructions
+// we need to use (SD) are encodable with the stack-pointer immediate offsets we need to use.
+//
+// The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction.
+// Note that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
+// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
+// it below (at a lower address) the callee-saved registers, as that is currently how we
+// do frame layout. This means that the first stack offset will be 8 and the stack pointer
+// adjustment must be done by a SUB, and not folded in to a pre-indexed store.
+//
+// Arguments:
+//    regsToSaveMask          - The mask of callee-saved registers to save. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
+//                              if non-zero spDelta, then this is the offset of the first save *after* that
+//                              SP adjustment.
+//    spDelta                 - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                              zero).
+//
+// Notes:
+//    The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers.
+//
+void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
+{
+    assert(spDelta <= 0);
+
+    unsigned regsToSaveCount = genCountBits(regsToSaveMask);
+    if (regsToSaveCount == 0)
+    {
+        if (spDelta != 0)
+        {
+            // Currently this is the case for varargs only
+            // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        }
+        return;
+    }
+
+    assert((spDelta % 16) == 0);
+
+    assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED));
+
+    // Save integer registers at higher addresses than floating-point registers.
+
+    regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = regsToSaveMask & ~maskSaveRegsFloat;
+
+    if (maskSaveRegsFloat != RBM_NONE)
+    {
+        genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset);
+        spDelta = 0;
+        lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES;
+    }
+
+    if (maskSaveRegsInt != RBM_NONE)
+    {
+        genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
+        // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
+    }
+}
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegisterGroup: Restores the group of registers described by the mask.
+//
+// Arguments:
+//   regsMask             - a mask of registers for epilog generation;
+//   spDelta              - if non-zero, the amount to add to SP after the last register restore (or together with it);
+//   spOffset             - the offset from SP that is the beginning of the callee-saved register area;
+//
+void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
+{
+    const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
+
+    ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
+    genBuildRegPairsStack(regsMask, &regStack);
+
+    int stackDelta = 0;
+    for (int i = 0; i < regStack.Height(); ++i)
+    {
+        bool lastRestoreInTheGroup = (i == regStack.Height() - 1);
+        bool updateStackDelta      = lastRestoreInTheGroup && (spDelta != 0);
+        if (updateStackDelta)
+        {
+            // Update stack delta only if it is the last restore (the first save).
+            assert(stackDelta == 0);
+            stackDelta = spDelta;
+        }
+
+        RegPair regPair = regStack.Top(i);
+        if (regPair.reg2 != REG_NA)
+        {
+            spOffset -= 2 * slotSize;
+
+            genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_R21,
+                                    nullptr);
+        }
+        else
+        {
+            spOffset -= slotSize;
+            genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_R21, nullptr);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
+// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
+//
+// Arguments:
+//    regsToRestoreMask       - The mask of callee-saved registers to restore. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
+//    spDelta                 - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                              zero).
+//
+// Here's an example restore sequence:
+//      ld     s7, 88(sp)
+//      ld     s6, 80(sp)
+//      ld     s5, 72(sp)
+//      ld     s4, 64(sp)
+//      ld     s3, 56(sp)
+//      ld     s2, 48(sp)
+//      ld     s1, 40(sp)
+//      ld     s0, 32(sp)
+//
+// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
+// the last restore adjusts SP by the specified amount. For example:
+//      ld     s7, 56(sp)
+//      ld     s6, 48(sp)
+//      ld     s5, 40(sp)
+//      ld     s4, 32(sp)
+//      ld     s3, 24(sp)
+//      ld     s2, 16(sp)
+//      ld     s1, 88(sp)
+//      ld     s0, 80(sp)
+//
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
+{
+    assert(spDelta >= 0);
+    unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
+    if (regsToRestoreCount == 0)
+    {
+        if (spDelta != 0)
+        {
+            // Currently this is the case for varargs only
+            // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        }
+        return;
+    }
+
+    assert((spDelta % 16) == 0);
+
+    // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED.
+    assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA));
+
+    // Point past the end, to start. We predecrement to find the offset to load from.
+    static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+    int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES;
+
+    // Save integer registers at higher addresses than floating-point registers.
+
+    regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
+    regMaskTP maskRestoreRegsInt   = regsToRestoreMask & ~maskRestoreRegsFloat;
+
+    // Restore in the opposite order of saving.
+
+    if (maskRestoreRegsInt != RBM_NONE)
+    {
+        int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?
+        genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset);
+        spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES;
+    }
+
+    if (maskRestoreRegsFloat != RBM_NONE)
+    {
+        // If there is any spDelta, it must be used here.
+        genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset);
+        // No need to update spOffset since it's not used after this.
+    }
+}
+
+// clang-format off
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ *
+ *  Funclets have the following incoming arguments:
+ *
+ *      catch:          a0 = the exception object that was caught (see GT_CATCH_ARG)
+ *      filter:         a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function
+ *      finally/fault:  none
+ *
+ *  Funclets set the following registers on exit:
+ *
+ *      catch:          v0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ *      filter:         v0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ *      finally/fault:  none
+ *
+ *  The LOONGARCH64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
+ *  including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
+ *
+ *  Frame type 1:
+ *     For #framesz <= 32760 and FP/RA at bottom:
+ *     daddiu sp,sp,-#framesz    ; establish the frame (predecrement by #framesz), save FP/RA
+ *     sd fp,#outsz(sp)
+ *     sd ra,#outsz+8(sp)
+ *     sd s0,#xxx-8(sp)          ; save callee-saved registers, as necessary
+ *     sd s1,#xxx(sp)
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *  Frame type 2:
+ *     For #framesz <= 32760 and FP/RA at top:
+ *     daddiu sp,sp,-#framesz          ; establish the frame
+ *     sd s0,xxx(sp)                 ; save callee-saved registers, as necessary
+ *     sd s1,xxx+8(sp)
+ *     sd s?,xxx+?(sp)
+ *     sd fp,xxx+?(sp)              ; save FP/RA.
+ *     sd ra,xxx+?(sp)
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *  Frame type 3:
+ *     For #framesz > 32760 and FP/RA at bottom:
+ *     ; for funclet, #framesz-#outsz will be less than 32760.
+ *
+ *     daddiu sp,sp,-(#framesz-#FPRA_delta)     ; note maybe 16byte-alignment.
+ *     sd fp, pad(sp)                           ; pad is depended on stack-16byte-alignment..
+ *     sd ra, pad+8(sp)
+ *     sd s0,#xxx(sp)                         ; save callee-saved registers, as necessary,
+ *     sd s1,#xxx+8(sp)
+ *     daddiu sp,sp,-#outsz                     ; create space for outgoing argument space, mabye 16byte-alignment.
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *  Frame type 4:
+ *     For #framesz > 32760 and FP/RA at top:
+ *     daddiu sp,sp,-#framesz+PSP_offset  ; establish the frame, maybe 16byte-alignment.
+ *     sd s0,xxx(sp)                      ; save callee-saved registers, as necessary
+ *     sd s1,xxx+8(sp)
+ *     sd s?,xxx+?(sp)
+ *     sd fp,xxx+?(sp)              ; save FP/RA.
+ *     sd ra,xxx+?(sp)
+ *
+ *     daddiu sp,sp,-#PSP_offset    ; establish the frame, maybe 16byte-alignment.
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *
+ * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
+ * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
+ * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 224 bytes:
+ *
+ *     FP,RA registers
+ *     8 int callee-saved register s0-s7
+ *     8 float callee-saved registers f24-f31
+ *     8 saved integer argument registers a0-a7, if varargs function
+ *     1 PSP slot
+ *     1 alignment slot, future maybe add gp
+ *     == 28 slots * 8 bytes = 224 bytes.
+ *
+ * The outgoing argument size, however, can be very large, if we call a function that takes a large number of
+ * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of
+ * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before
+ * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset
+ * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument
+ * space. Both changes to SP might need to add alignment padding.
+ *
+ * In addition to the above "standard" frames, we also need to support a frame where the saved FP/RA are at the
+ * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/RA
+ * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc.
+ * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/RA
+ * changed, but where the alignment padding is placed has also changed.
+ *
+ *
+ * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
+ * as in the main function.
+ *
+ * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we
+ * must add buffer space for the saved varargs/argument registers here, if the main function did the same.
+ *
+ *     ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *
+ *     if (this is a filter funclet)
+ *     {
+ *          // a1 on entry to a filter funclet is CallerSP of the containing function:
+ *          // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ *          // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ *          // a funclet. Consider:
+ *          //
+ *          //    try {
+ *          //        try {
+ *          //            throw new Exception();
+ *          //        } catch(Exception) {
+ *          //            throw new Exception();     // The exception thrown here ...
+ *          //        }
+ *          //    } filter {                         // ... will be processed here, while the "catch" funclet frame is still on the stack
+ *          //    } filter-handler {
+ *          //    }
+ *          //
+ *          // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
+ *          // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
+ *          // create a main function PSP for any function with a filter.
+ *
+ *          ld a1, CallerSP_to_PSP_slot_delta(a1)  ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ *          sd a1, SP_to_PSP_slot_delta(sp)        ; store the PSP
+ *          daddiu fp, a1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ *     }
+ *     else
+ *     {
+ *          // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ *          // TODO-LOONGARCH64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ *          daddiu a3, fp, Function_FP_to_CallerSP_delta  ; compute the CallerSP, given the frame pointer. a3 is scratch?
+ *          sd a3, SP_to_PSP_slot_delta(sp)         ; store the PSP
+ *     }
+ *
+ *  An example epilog sequence is then:
+ *
+ *     daddiu sp,sp,#outsz             ; if any outgoing argument space
+ *     ...                          ; restore callee-saved registers
+ *     ld s0,#xxx-8(sp)
+ *     ld s1,#xxx(sp)
+ *     ld fp,#framesz-8(sp)
+ *     ld ra,#framesz(sp)
+ *     daddiu  sp,sp,#framesz
+ *     jr  ra
+ *
+ */
+// clang-format on
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletProlog()\n");
+#endif
+
+    assert(block != NULL);
+    assert(block->bbFlags & BBF_FUNCLET_BEG);
+
+    ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+    gcInfo.gcResetForBB();
+
+    compiler->unwindBegProlog();
+
+    regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
+
+    // Funclets must always save RA and FP, since when we have funclets we must have an FP frame.
+    assert((maskSaveRegsInt & RBM_RA) != 0);
+    assert((maskSaveRegsInt & RBM_FP) != 0);
+
+    bool isFilter  = (block->bbCatchTyp == BBCT_FILTER);
+    int  frameSize = genFuncletInfo.fiSpDelta1;
+
+    regMaskTP maskArgRegsLiveIn;
+    if (isFilter)
+    {
+        maskArgRegsLiveIn = RBM_A0 | RBM_A1;
+    }
+    else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+    {
+        maskArgRegsLiveIn = RBM_NONE;
+    }
+    else
+    {
+        maskArgRegsLiveIn = RBM_A0;
+    }
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+    {
+        printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
+    }
+#endif
+
+    int offset = 0;
+    if (genFuncletInfo.fiFrameType == 1)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < 0);
+        assert(frameSize >= -2048);
+
+        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
+        genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
+
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE,
+                                    genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+
+        maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
+
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8,
+                                        0);
+    }
+    else if (genFuncletInfo.fiFrameType == 2)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < -2048);
+
+        offset       = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
+        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
+        offset       = SP_delta - offset;
+
+        genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+        compiler->unwindSaveReg(REG_FP, offset);
+
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+        compiler->unwindSaveReg(REG_RA, offset + 8);
+
+        maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
+
+        offset = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0);
+
+        genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else
+    {
+        unreached();
+    }
+
+    // This is the end of the OS-reported prolog for purposes of unwinding
+    compiler->unwindEndProlog();
+
+    // If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet frame.
+    if (compiler->lvaPSPSym != BAD_VAR_NUM)
+    {
+        if (isFilter)
+        {
+            // This is the first block of a filter
+            // Note that register a1 = CallerSP of the containing function
+            // A1 is overwritten by the first Load (new callerSP)
+            // A2 is scratch when we have a large constant offset
+
+            // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or
+            // function)
+            genInstrWithConstant(INS_ld_d, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta,
+                                 REG_A2, false);
+            regSet.verifyRegUsed(REG_A1);
+
+            // Store the PSP value (aka CallerSP)
+            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta,
+                                 REG_A2, false);
+
+            // re-establish the frame pointer
+            genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1,
+                                 genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false);
+        }
+        else // This is a non-filter funclet
+        {
+            // A3 is scratch, A2 can also become scratch.
+
+            // compute the CallerSP, given the frame pointer. a3 is scratch?
+            genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_A3, REG_FPBASE,
+                                 -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false);
+            regSet.verifyRegUsed(REG_A3);
+
+            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta,
+                                 REG_A2, false);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFuncletEpilog()\n");
+    }
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    bool unwindStarted = false;
+    int  frameSize     = genFuncletInfo.fiSpDelta1;
+
+    if (!unwindStarted)
+    {
+        // We can delay this until we know we'll generate an unwindable instruction, if necessary.
+        compiler->unwindBegEpilog();
+        unwindStarted = true;
+    }
+
+    regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskRestoreRegsInt   = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+
+    // Funclets must always save RA and FP, since when we have funclets we must have an FP frame.
+    assert((maskRestoreRegsInt & RBM_RA) != 0);
+    assert((maskRestoreRegsInt & RBM_FP) != 0);
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+    {
+        printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
+    }
+#endif
+
+    regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+
+    assert(frameSize < 0);
+    if (genFuncletInfo.fiFrameType == 1)
+    {
+        // fiFrameType constraints:
+        assert(frameSize >= -2048);
+        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
+
+        regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
+
+        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE,
+                                    genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
+
+        // generate daddiu SP,SP,imm
+        genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (genFuncletInfo.fiFrameType == 2)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < -2048);
+
+        int offset   = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
+        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
+        offset       = SP_delta - offset;
+
+        // first, generate daddiu SP,SP,imm
+        genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
+        assert(offset2 < 2040); // can amend.
+
+        regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
+        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+        compiler->unwindSaveReg(REG_RA, offset + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+        compiler->unwindSaveReg(REG_FP, offset);
+
+        // second, generate daddiu SP,SP,imm for remaine space.
+        genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else
+    {
+        unreached();
+    }
+    GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0);
+    compiler->unwindReturn(REG_RA);
+
+    compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ *  Note that all funclet prologs are identical, and all funclet epilogs are
+ *  identical (per type: filters are identical, and non-filters are identical).
+ *  Thus, we compute the data used for these just once.
+ *
+ *  See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+    if (!compiler->ehAnyFunclets())
+    {
+        return;
+    }
+
+    assert(isFramePointerUsed());
+
+    // The frame size and offsets must be finalized
+    assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
+
+    genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
+
+    regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+    assert((rsMaskSaveRegs & RBM_RA) != 0);
+    assert((rsMaskSaveRegs & RBM_FP) != 0);
+
+    unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0;
+
+    unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+    assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1));
+
+    unsigned saveRegsPlusPSPSize =
+        roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize;
+
+    unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
+
+    assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+    unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+
+    unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
+    assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
+
+    int SP_to_FPRA_save_delta = compiler->lvaOutgoingArgSpaceSize;
+
+    unsigned funcletFrameSize        = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+    unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
+    assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
+
+    unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+    assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+
+    if (maxFuncletFrameSizeAligned <= (2048 - 8))
+    {
+        genFuncletInfo.fiFrameType = 1;
+        saveRegsPlusPSPSize -= 2 * 8; // FP/RA
+    }
+    else
+    {
+        unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
+        assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
+
+        genFuncletInfo.fiFrameType = 2;
+        saveRegsPlusPSPSize -= 2 * 8; // FP/RA
+    }
+
+    int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
+    genFuncletInfo.fiSpDelta1      = -(int)funcletFrameSizeAligned;
+    int SP_to_PSP_slot_delta       = funcletFrameSizeAligned - saveRegsPlusPSPSize;
+
+    /* Now save it for future use */
+    genFuncletInfo.fiSaveRegs              = rsMaskSaveRegs;
+    genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta;
+
+    genFuncletInfo.fiSP_to_PSP_slot_delta       = SP_to_PSP_slot_delta;
+    genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n");
+        printf("Funclet prolog / epilog info\n");
+        printf("                        Save regs: ");
+        dspRegMask(genFuncletInfo.fiSaveRegs);
+        printf("\n");
+        printf("    Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+        printf("  SP to FP/RA save location delta: %d\n", genFuncletInfo.fiSP_to_FPRA_save_delta);
+        printf("                       Frame type: %d\n", genFuncletInfo.fiFrameType);
+        printf("                       SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
+
+        if (compiler->lvaPSPSym != BAD_VAR_NUM)
+        {
+            if (CallerSP_to_PSP_slot_delta !=
+                compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+            {
+                printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+                       compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+            }
+        }
+    }
+
+    assert(genFuncletInfo.fiSP_to_FPRA_save_delta >= 0);
+#endif // DEBUG
+}
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFnEpilog()\n");
+    }
+#endif // DEBUG
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars);
+    gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs;
+    gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+    {
+        printf("\n__epilog:\n");
+    }
+
+    if (verbose)
+    {
+        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+        printf(", gcRegGCrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+        GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+        printf(", gcRegByrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegByrefSetCur);
+        GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+    GenTree* lastNode = block->lastNode();
+
+    // Method handle and address info used in case of jump epilog
+    CORINFO_METHOD_HANDLE methHnd = nullptr;
+    CORINFO_CONST_LOOKUP  addrInfo;
+    addrInfo.addr       = nullptr;
+    addrInfo.accessType = IAT_VALUE;
+
+    if (jmpEpilog && (lastNode->gtOper == GT_JMP))
+    {
+        methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1;
+        compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+    }
+
+    compiler->unwindBegEpilog();
+
+    if (jmpEpilog)
+    {
+        SetHasTailCalls(true);
+
+        noway_assert(block->bbJumpKind == BBJ_RETURN);
+        noway_assert(block->GetFirstLIRNode() != nullptr);
+
+        /* figure out what jump we have */
+        GenTree* jmpNode = lastNode;
+#if !FEATURE_FASTTAILCALL
+        noway_assert(jmpNode->gtOper == GT_JMP);
+#else  // FEATURE_FASTTAILCALL
+        // armarch
+        // If jmpNode is GT_JMP then gtNext must be null.
+        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+        noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+        // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+        noway_assert((jmpNode->gtOper == GT_JMP) ||
+                     ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+        // The next block is associated with this "if" stmt
+        if (jmpNode->gtOper == GT_JMP)
+#endif // FEATURE_FASTTAILCALL
+        {
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            assert(methHnd != nullptr);
+            assert(addrInfo.addr != nullptr);
+
+            emitter::EmitCallType callType;
+            void*                 addr;
+            regNumber             indCallReg;
+            switch (addrInfo.accessType)
+            {
+                case IAT_VALUE:
+                // TODO-LOONGARCH64-CQ: using B/BL for optimization.
+                case IAT_PVALUE:
+                    // Load the address into a register, load indirect and call  through a register
+                    // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
+                    callType   = emitter::EC_INDIR_R;
+                    indCallReg = REG_INDIRECT_CALL_TARGET_REG;
+                    addr       = NULL;
+                    instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
+                    if (addrInfo.accessType == IAT_PVALUE)
+                    {
+                        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                        regSet.verifyRegUsed(indCallReg);
+                    }
+                    break;
+
+                case IAT_RELPVALUE:
+                {
+                    // Load the address into a register, load relative indirect and call through a register
+                    // We have to use R12 since we assume the argument registers are in use
+                    // LR is used as helper register right before it is restored from stack, thus,
+                    // all relative address calculations are performed before LR is restored.
+                    callType   = emitter::EC_INDIR_R;
+                    indCallReg = REG_T2;
+                    addr       = NULL;
+
+                    regSet.verifyRegUsed(indCallReg);
+                    break;
+                }
+
+                case IAT_PPVALUE:
+                default:
+                    NO_WAY("Unsupported JMP indirection");
+            }
+
+            /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
+             * the same descriptor with some minor adjustments.
+             */
+
+            genPopCalleeSavedRegisters(true);
+
+            // clang-format off
+            GetEmitter()->emitIns_Call(callType,
+                                       methHnd,
+                                       INDEBUG_LDISASM_COMMA(nullptr)
+                                       addr,
+                                       0,          // argSize
+                                       EA_UNKNOWN // retSize
+                                       MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+                                       gcInfo.gcVarPtrSetCur,
+                                       gcInfo.gcRegGCrefSetCur,
+                                       gcInfo.gcRegByrefSetCur,
+                                       DebugInfo(),
+                                       indCallReg,    // ireg
+                                       REG_NA,        // xreg
+                                       0,             // xmul
+                                       0,             // disp
+                                       true);         // isJump
+            // clang-format on
+            CLANG_FORMAT_COMMENT_ANCHOR;
+        }
+#if FEATURE_FASTTAILCALL
+        else
+        {
+            genPopCalleeSavedRegisters(true);
+            genCallInstruction(jmpNode->AsCall());
+        }
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        genPopCalleeSavedRegisters(false);
+
+        GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0);
+        compiler->unwindReturn(REG_RA);
+    }
+
+    compiler->unwindEndEpilog();
+}
+
+void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (compiler->lvaPSPSym == BAD_VAR_NUM)
+    {
+        return;
+    }
+
+    noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
+
+    int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+    // We will just use the initReg since it is an available register
+    // and we are probably done using it anyway...
+    regNumber regTmp = initReg;
+    *pInitRegZeroed  = false;
+
+    genInstrWithConstant(INS_addi_d, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, REG_R21, false);
+    GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+}
+
+//-----------------------------------------------------------------------------
+// genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case
+// `genUseBlockInit` is set.
+//
+// Arguments:
+//    untrLclHi      - (Untracked locals High-Offset)  The upper bound offset at which the zero init
+//                                                     code will end initializing memory (not inclusive).
+//    untrLclLo      - (Untracked locals Low-Offset)   The lower bound at which the zero init code will
+//                                                     start zero initializing memory.
+//    initReg        - A scratch register (that gets set to zero on some platforms).
+//    pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero,
+//                     'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched.
+//
+void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
+{
+    regNumber rAddr;
+    regNumber rCnt = REG_NA; // Invalid
+    regMaskTP regMask;
+
+    regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
+    // see: src/jit/registerloongarch64.h
+    availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
+                                                        // currently live
+    availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
+                                       // a large constant.
+
+    rAddr           = initReg;
+    *pInitRegZeroed = false;
+
+    // rAddr is not a live incoming argument reg
+    assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0);
+    assert(untrLclLo % 4 == 0);
+
+    if (emitter::isValidSimm12(untrLclLo))
+    {
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
+    }
+    else
+    {
+        // Load immediate into the InitReg register
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
+        GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
+        *pInitRegZeroed = false;
+    }
+
+    bool     useLoop   = false;
+    unsigned uCntBytes = untrLclHi - untrLclLo;
+    assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
+    unsigned int padding = untrLclLo & 0x7;
+
+    if (padding)
+    {
+        assert(padding == 4);
+        GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, 0);
+        uCntBytes -= 4;
+    }
+
+    unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
+
+    // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline.
+    // When it is 10 or greater, we will emit a loop containing a sd instruction.
+    // In both of these cases the sd instruction will write two zeros to memory
+    // and we will use a single str instruction at the end whenever we have an odd count.
+    if (uCntSlots >= 10)
+        useLoop = true;
+
+    if (useLoop)
+    {
+        // We pick the next lowest register number for rCnt
+        noway_assert(availMask != RBM_NONE);
+        regMask = genFindLowestBit(availMask);
+        rCnt    = genRegNumFromMask(regMask);
+        availMask &= ~regMask;
+
+        noway_assert(uCntSlots >= 2);
+        assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming
+                                                                                // argument reg
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
+
+        // TODO-LOONGARCH64: maybe optimize further
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding);
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding);
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rCnt, rCnt, -1);
+
+        // bne rCnt, zero, -4 * 4
+        ssize_t imm = -16;
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES);
+        GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm);
+
+        uCntBytes %= REGSIZE_BYTES * 2;
+    }
+    else
+    {
+        while (uCntBytes >= REGSIZE_BYTES * 2)
+        {
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding);
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding);
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding);
+            uCntBytes -= REGSIZE_BYTES * 2;
+            padding = 0;
+        }
+    }
+
+    if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
+    {
+        if ((uCntBytes - REGSIZE_BYTES) == 0)
+        {
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding);
+        }
+        else
+        {
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding);
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES);
+        }
+        uCntBytes -= REGSIZE_BYTES;
+    }
+    if (uCntBytes > 0)
+    {
+        assert(uCntBytes == sizeof(int));
+        GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, padding);
+        uCntBytes -= sizeof(int);
+    }
+    noway_assert(uCntBytes == 0);
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           End Prolog / Epilog                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
+{
+    // Generate a call to the finally, like this:
+    //      mov  a0,qword ptr [fp + 10H] / sp    // Load a0 with PSPSym, or sp if PSPSym is not used
+    //      bl  finally-funclet
+    //      b    finally-return                  // Only for non-retless finally calls
+    // The 'b' can be a NOP if we're going to the next block.
+
+    if (compiler->lvaPSPSym != BAD_VAR_NUM)
+    {
+        GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0);
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0);
+    }
+    GetEmitter()->emitIns_J(INS_bl, block->bbJumpDest);
+
+    if (block->bbFlags & BBF_RETLESS_CALL)
+    {
+        // We have a retless call, and the last instruction generated was a call.
+        // If the next block is in a different EH region (or is the end of the code
+        // block), then we need to generate a breakpoint here (since it will never
+        // get executed) to get proper unwind behavior.
+
+        if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+        {
+            instGen(INS_break); // This should never get executed
+        }
+    }
+    else
+    {
+        // Because of the way the flowgraph is connected, the liveness info for this one instruction
+        // after the call is not (can not be) correct in cases where a variable has a last use in the
+        // handler.  So turn off GC reporting for this single instruction.
+        GetEmitter()->emitDisableGC();
+
+        // Now go to where the finally funclet needs to return to.
+        if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+        {
+            // Fall-through.
+            // TODO-LOONGARCH64-CQ: Can we get rid of this instruction, and just have the call return directly
+            // to the next instruction? This would depend on stack walking from within the finally
+            // handler working without this instruction being in this special EH region.
+            instGen(INS_nop);
+        }
+        else
+        {
+            inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+        }
+
+        GetEmitter()->emitEnableGC();
+    }
+
+    // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+    // jump target using bbJumpDest - that is already used to point
+    // to the finally block. So just skip past the BBJ_ALWAYS unless the
+    // block is RETLESS.
+    if (!(block->bbFlags & BBF_RETLESS_CALL))
+    {
+        assert(block->isBBCallAlwaysPair());
+        block = block->bbNext;
+    }
+    return block;
+}
+
+void CodeGen::genEHCatchRet(BasicBlock* block)
+{
+    GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
+}
+
+//  move an immediate value into an integer register
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
+                                     regNumber reg,
+                                     ssize_t   imm,
+                                     insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+{
+    emitter* emit = GetEmitter();
+
+    if (!compiler->opts.compReloc)
+    {
+        size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs.
+    }
+
+    if (EA_IS_RELOC(size))
+    {
+        assert(genIsValidIntReg(reg));
+        emit->emitIns_R_AI(INS_bl, size, reg, imm); // for example: EA_PTR_DSP_RELOC
+    }
+    else
+    {
+        emit->emitIns_I_la(size, reg, imm);
+    }
+
+    regSet.verifyRegUsed(reg);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
+{
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+        {
+            // relocatable values tend to come down as a CNS_INT of native int type
+            // so the line between these two opcodes is kind of blurry
+            GenTreeIntConCommon* con    = tree->AsIntConCommon();
+            ssize_t              cnsVal = con->IconValue();
+
+            // if (con->ImmedValNeedsReloc(compiler))
+            if (con->ImmedValNeedsReloc(compiler) && compiler->opts.compReloc)
+            {
+                // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                assert(compiler->opts.compReloc);
+                GetEmitter()->emitIns_R_AI(INS_bl, EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                regSet.verifyRegUsed(targetReg);
+            }
+            else
+            {
+                genSetRegToIcon(targetReg, cnsVal, targetType);
+            }
+        }
+        break;
+
+        case GT_CNS_DBL:
+        {
+            emitter* emit       = GetEmitter();
+            emitAttr size       = emitActualTypeSize(tree);
+            double   constValue = tree->AsDblCon()->gtDconVal;
+
+            // Make sure we use "daddiu reg, zero, 0x00"  only for positive zero (0.0)
+            // and not for negative zero (-0.0)
+            if (*(__int64*)&constValue == 0)
+            {
+                // A faster/smaller way to generate 0.0
+                // We will just zero out the entire vector register for both float and double
+                emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, REG_R0);
+            }
+            else
+            {
+                // Get a temp integer register to compute long address.
+                // regNumber addrReg = tree->GetSingleTempReg();
+
+                // We must load the FP constant from the constant pool
+                // Emit a data section constant for the float or double constant.
+                CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
+
+                // Load the FP constant.
+                assert(targetReg >= REG_F0);
+
+                instruction ins = size == EA_4BYTE ? INS_fld_s : INS_fld_d;
+
+                // Compute the address of the FP constant and load the data.
+                emit->emitIns_R_C(ins, size, targetReg, REG_NA, hnd, 0);
+            }
+        }
+        break;
+
+        default:
+            unreached();
+    }
+}
+
+// Produce code for a GT_INC_SATURATE node.
+void CodeGen::genCodeForIncSaturate(GenTree* tree)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+    assert(!treeNode->gtOverflowEx());
+
+    genConsumeOperands(treeNode);
+
+    regNumber targetReg  = treeNode->GetRegNum();
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = GetEmitter();
+    emitAttr  attr       = emitActualTypeSize(treeNode);
+    unsigned  isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED);
+
+    GenTree* op1 = treeNode->gtGetOp1();
+    GenTree* op2 = treeNode->gtGetOp2();
+
+    assert(!varTypeIsFloating(targetType));
+
+    // op1 and op2 can only be a reg at present, will amend in the future.
+    assert(!op1->isContained());
+    assert(!op2->isContained());
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(targetReg != REG_NA);
+
+    if (EA_SIZE(attr) == EA_8BYTE)
+    {
+        instruction ins = isUnsigned ? INS_mulh_du : INS_mulh_d;
+
+        emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum());
+    }
+    else
+    {
+        assert(EA_SIZE(attr) == EA_4BYTE);
+        instruction ins = isUnsigned ? INS_mulh_wu : INS_mulh_w;
+
+        emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum());
+    }
+
+    genProduceReg(treeNode);
+}
+
+// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR and XOR
+// This method is expected to have called genConsumeOperands() before calling it.
+void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
+{
+    const genTreeOps oper      = treeNode->OperGet();
+    regNumber        targetReg = treeNode->GetRegNum();
+    emitter*         emit      = GetEmitter();
+
+    assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
+
+    GenTree*    op1 = treeNode->gtGetOp1();
+    GenTree*    op2 = treeNode->gtGetOp2();
+    instruction ins = genGetInsForOper(treeNode);
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(targetReg != REG_NA);
+
+    regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
+    assert(r == targetReg);
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclVar: Produce code for a GT_LCL_VAR node.
+//
+// Arguments:
+//    tree - the GT_LCL_VAR node
+//
+void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
+{
+    unsigned varNum = tree->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+    LclVarDsc* varDsc         = &(compiler->lvaTable[varNum]);
+    bool       isRegCandidate = varDsc->lvIsRegCandidate();
+
+    // lcl_vars are not defs
+    assert((tree->gtFlags & GTF_VAR_DEF) == 0);
+
+    // If this is a register candidate that has been spilled, genConsumeReg() will
+    // reload it at the point of use.  Otherwise, if it's not in a register, we load it here.
+
+    if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
+    {
+        var_types targetType = varDsc->GetRegisterType(tree);
+        // if (tree->gtFlags & GTF_UNSIGNED)
+        //    targetType = varTypeSignedToUnsigned(targetType);//uuuuu.
+        emitter* emit = GetEmitter();
+
+        // targetType must be a normal scalar type and not a TYP_STRUCT
+        assert(targetType != TYP_STRUCT);
+        instruction ins  = ins_Load(targetType);
+        emitAttr    attr = emitTypeSize(targetType);
+
+        emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0);
+        genProduceReg(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
+//
+// Arguments:
+//    tree - the GT_STORE_LCL_FLD node
+//
+void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
+{
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+    emitter*  emit       = GetEmitter();
+    noway_assert(targetType != TYP_STRUCT);
+
+#ifdef FEATURE_SIMD
+    // storing of TYP_SIMD12 (i.e. Vector3) field
+    if (tree->TypeGet() == TYP_SIMD12)
+    {
+        genStoreLclTypeSIMD12(tree);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    // record the offset
+    unsigned offset = tree->GetLclOffs();
+
+    // We must have a stack store with GT_STORE_LCL_FLD
+    noway_assert(targetReg == REG_NA);
+
+    unsigned varNum = tree->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+    // Ensure that lclVar nodes are typed correctly.
+    assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+    GenTree* data = tree->gtOp1;
+    genConsumeRegs(data);
+
+    regNumber dataReg = REG_NA;
+    if (data->isContainedIntOrIImmed())
+    {
+        assert(data->IsIntegralConst(0));
+        dataReg = REG_R0;
+    }
+    else if (data->isContained())
+    {
+        assert(data->OperIs(GT_BITCAST));
+        const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
+        assert(!bitcastSrc->isContained());
+        dataReg = bitcastSrc->GetRegNum();
+    }
+    else
+    {
+        assert(!data->isContained());
+        dataReg = data->GetRegNum();
+    }
+    assert(dataReg != REG_NA);
+
+    instruction ins = ins_StoreFromSrc(dataReg, targetType);
+
+    emitAttr attr = emitTypeSize(targetType);
+
+    emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+    genUpdateLife(tree);
+
+    varDsc->SetRegNum(REG_STK);
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
+//
+// Arguments:
+//    lclNode - the GT_STORE_LCL_VAR node
+//
+void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
+{
+    GenTree* data = lclNode->gtOp1;
+
+    // var = call, where call returns a multi-reg return value
+    // case is handled separately.
+    if (data->gtSkipReloadOrCopy()->IsMultiRegNode())
+    {
+        genMultiRegCallStoreToLocal(lclNode);
+        return;
+    }
+
+    regNumber targetReg = lclNode->GetRegNum();
+    emitter*  emit      = GetEmitter();
+
+    unsigned varNum = lclNode->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+    LclVarDsc* varDsc     = &(compiler->lvaTable[varNum]);
+    var_types  targetType = varDsc->GetRegisterType(lclNode);
+
+    if (lclNode->IsMultiReg())
+    {
+        regNumber    operandReg = genConsumeReg(data);
+        unsigned int regCount   = varDsc->lvFieldCnt;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            NYI("unimplemented on LOONGARCH64 yet");
+            regNumber varReg = lclNode->GetRegByIndex(i);
+            assert(varReg != REG_NA);
+            unsigned   fieldLclNum = varDsc->lvFieldLclStart + i;
+            LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum);
+            assert(fieldVarDsc->TypeGet() == TYP_FLOAT);
+            GetEmitter()->emitIns_R_R_I(INS_st_d, emitTypeSize(TYP_FLOAT), varReg, operandReg, i);
+        }
+        genProduceReg(lclNode);
+    }
+    else
+    {
+#ifdef FEATURE_SIMD
+        // storing of TYP_SIMD12 (i.e. Vector3) field
+        if (lclNode->TypeGet() == TYP_SIMD12)
+        {
+            genStoreLclTypeSIMD12(lclNode);
+            return;
+        }
+#endif // FEATURE_SIMD
+
+        genConsumeRegs(data);
+
+        regNumber dataReg = REG_NA;
+        if (data->isContained())
+        {
+            // This is only possible for a zero-init or bitcast.
+            const bool zeroInit = data->IsIntegralConst(0);
+            // TODO-LOONGARCH64-CQ: not supporting SIMD.
+            assert(!varTypeIsSIMD(targetType));
+
+            if (zeroInit)
+            {
+                dataReg = REG_R0;
+            }
+            else if (data->IsIntegralConst())
+            {
+                ssize_t imm = data->AsIntConCommon()->IconValue();
+                emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+                dataReg = REG_R21;
+            }
+            else
+            {
+                assert(data->OperIs(GT_BITCAST));
+                const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
+                assert(!bitcastSrc->isContained());
+                dataReg = bitcastSrc->GetRegNum();
+            }
+        }
+        else
+        {
+            assert(!data->isContained());
+            dataReg = data->GetRegNum();
+        }
+        assert(dataReg != REG_NA);
+
+        if (targetReg == REG_NA) // store into stack based LclVar
+        {
+            inst_set_SV_var(lclNode);
+
+            instruction ins  = ins_StoreFromSrc(dataReg, targetType);
+            emitAttr    attr = emitActualTypeSize(targetType);
+
+            emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
+
+            genUpdateLife(lclNode);
+
+            varDsc->SetRegNum(REG_STK);
+        }
+        else // store into register (i.e move into register)
+        {
+            if (dataReg != targetReg)
+            {
+                // Assign into targetReg when dataReg (from op1) is not the same register
+                inst_Mov(targetType, targetReg, dataReg, true, emitActualTypeSize(targetType));
+            }
+            genProduceReg(lclNode);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genSimpleReturn: Generates code for simple return statement for loongarch64.
+//
+// Note: treeNode's and op1's registers are already consumed.
+//
+// Arguments:
+//    treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type
+//
+// Return Value:
+//    None
+//
+void CodeGen::genSimpleReturn(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+    GenTree*  op1        = treeNode->gtGetOp1();
+    var_types targetType = treeNode->TypeGet();
+
+    assert(targetType != TYP_STRUCT);
+    assert(targetType != TYP_VOID);
+
+    regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET;
+
+    bool movRequired = (op1->GetRegNum() != retReg);
+
+    if (!movRequired)
+    {
+        if (op1->OperGet() == GT_LCL_VAR)
+        {
+            GenTreeLclVarCommon* lcl            = op1->AsLclVarCommon();
+            bool                 isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate();
+            if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
+            {
+                // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
+
+                unsigned   lclNum  = lcl->GetLclNum();
+                LclVarDsc* varDsc  = &(compiler->lvaTable[lclNum]);
+                var_types  op1Type = genActualType(op1->TypeGet());
+                var_types  lclType = genActualType(varDsc->TypeGet());
+
+                if (genTypeSize(op1Type) < genTypeSize(lclType))
+                {
+                    movRequired = true;
+                }
+            }
+        }
+    }
+    if (movRequired)
+    {
+        emitAttr attr = emitActualTypeSize(targetType);
+        if (varTypeUsesFloatArgReg(treeNode))
+        {
+            if (attr == EA_4BYTE)
+            {
+                GetEmitter()->emitIns_R_R(INS_fmov_s, attr, retReg, op1->GetRegNum());
+            }
+            else
+            {
+                GetEmitter()->emitIns_R_R(INS_fmov_d, attr, retReg, op1->GetRegNum());
+            }
+        }
+        else
+        {
+            if (attr == EA_4BYTE)
+            {
+                if (treeNode->gtFlags & GTF_UNSIGNED)
+                {
+                    GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0);
+                }
+                else
+                {
+                    GetEmitter()->emitIns_R_R_I(INS_slli_w, attr, retReg, op1->GetRegNum(), 0);
+                }
+            }
+            else
+                GetEmitter()->emitIns_R_R_I(INS_ori, attr, retReg, op1->GetRegNum(), 0);
+        }
+    }
+}
+
+/***********************************************************************************************
+ *  Generate code for localloc
+ */
+void CodeGen::genLclHeap(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_LCLHEAP);
+    assert(compiler->compLocallocUsed);
+
+    emitter* emit = GetEmitter();
+    GenTree* size = tree->AsOp()->gtOp1;
+    noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+    regNumber            targetReg                = tree->GetRegNum();
+    regNumber            regCnt                   = REG_NA;
+    regNumber            pspSymReg                = REG_NA;
+    var_types            type                     = genActualType(size->gtType);
+    emitAttr             easz                     = emitTypeSize(type);
+    BasicBlock*          endLabel                 = nullptr; // can optimize for loongarch.
+    unsigned             stackAdjustment          = 0;
+    const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1;
+    target_ssize_t       lastTouchDelta =
+        ILLEGAL_LAST_TOUCH_DELTA; // The number of bytes from SP to the last stack address probed.
+
+    noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+    noway_assert(genStackLevel == 0);   // Can't have anything on the stack
+
+    // compute the amount of memory to allocate to properly STACK_ALIGN.
+    size_t amount = 0;
+    if (size->IsCnsIntOrI())
+    {
+        // If size is a constant, then it must be contained.
+        assert(size->isContained());
+
+        // If amount is zero then return null in targetReg
+        amount = size->AsIntCon()->gtIconVal;
+        if (amount == 0)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+            goto BAILOUT;
+        }
+
+        // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
+        amount = AlignUp(amount, STACK_ALIGN);
+    }
+    else
+    {
+        // If 0 bail out by returning null in targetReg
+        genConsumeRegAndCopy(size, targetReg);
+        endLabel = genCreateTempLabel();
+        emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0);
+
+        // Compute the size of the block to allocate and perform alignment.
+        // If compInitMem=true, we can reuse targetReg as regcnt,
+        // since we don't need any internal registers.
+        if (compiler->info.compInitMem)
+        {
+            assert(tree->AvailableTempRegCount() == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            regCnt = tree->ExtractTempReg();
+            if (regCnt != targetReg)
+            {
+                emit->emitIns_R_R_I(INS_ori, easz, regCnt, targetReg, 0);
+            }
+        }
+
+        // Align to STACK_ALIGN
+        // regCnt will be the total number of bytes to localloc
+        inst_RV_IV(INS_addi_d, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+
+        assert(regCnt != REG_R21);
+        ssize_t imm2 = ~(STACK_ALIGN - 1);
+        emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, imm2);
+        emit->emitIns_R_R_R(INS_and, emitActualTypeSize(type), regCnt, regCnt, REG_R21);
+    }
+
+    // If we have an outgoing arg area then we must adjust the SP by popping off the
+    // outgoing arg area. We will restore it right before we return from this method.
+    //
+    // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
+    // are the cases that need to be handled:
+    //   i) Method has out-going arg area.
+    //      It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
+    //      Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc
+    //      space.
+    //  ii) Method has no out-going arg area.
+    //      Nothing to pop off from the stack.
+    if (compiler->lvaOutgoingArgSpaceSize > 0)
+    {
+        unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+        // assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+        //                                                                // aligned
+        genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, rsGetRsvdReg());
+        stackAdjustment += outgoingArgSpaceAligned;
+    }
+
+    if (size->IsCnsIntOrI())
+    {
+        // We should reach here only for non-zero, constant size allocations.
+        assert(amount > 0);
+        ssize_t imm = -16;
+
+        // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes.
+        static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+        assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
+        size_t stpCount = amount / (REGSIZE_BYTES * 2);
+        if (compiler->info.compInitMem)
+        {
+            if (stpCount <= 4)
+            {
+                imm = -16 * stpCount;
+                emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+
+                imm = -imm;
+                while (stpCount != 0)
+                {
+                    imm -= 8;
+                    emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
+                    imm -= 8;
+                    emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
+                    stpCount -= 1;
+                }
+
+                lastTouchDelta = 0;
+
+                goto ALLOC_DONE;
+            }
+        }
+        else if (amount < compiler->eeGetPageSize()) // must be < not <=
+        {
+            // Since the size is less than a page, simply adjust the SP value.
+            // The SP might already be in the guard page, so we must touch it BEFORE
+            // the alloc, not after.
+
+            // ld_w r0, 0(SP)
+            emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SP, 0);
+
+            lastTouchDelta = amount;
+            imm            = -(ssize_t)amount;
+            if (emitter::isValidSimm12(imm))
+            {
+                emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+            }
+            else
+            {
+                emit->emitIns_I_la(EA_PTRSIZE, rsGetRsvdReg(), amount);
+                emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rsGetRsvdReg());
+            }
+
+            goto ALLOC_DONE;
+        }
+
+        // else, "mov regCnt, amount"
+        // If compInitMem=true, we can reuse targetReg as regcnt.
+        // Since size is a constant, regCnt is not yet initialized.
+        assert(regCnt == REG_NA);
+        if (compiler->info.compInitMem)
+        {
+            assert(tree->AvailableTempRegCount() == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            regCnt = tree->ExtractTempReg();
+        }
+        genSetRegToIcon(regCnt, amount, ((unsigned int)amount == amount) ? TYP_INT : TYP_LONG);
+    }
+
+    if (compiler->info.compInitMem)
+    {
+        // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+        // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid
+        // by tickling the pages, we will just push 0's on the stack.
+        //
+        // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
+        // and localloc size is a multiple of STACK_ALIGN.
+
+        // Loop:
+        ssize_t imm = -16;
+        emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+
+        emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 8);
+        emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 0);
+
+        // If not done, loop
+        // Note that regCnt is the number of bytes to stack allocate.
+        // Therefore we need to subtract 16 from regcnt here.
+        assert(genIsValidIntReg(regCnt));
+
+        emit->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(type), regCnt, regCnt, -16);
+
+        assert(imm == (-4 << 2)); // goto loop.
+        emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2));
+
+        lastTouchDelta = 0;
+    }
+    else
+    {
+        // At this point 'regCnt' is set to the total number of bytes to localloc.
+        //
+        // We don't need to zero out the allocated memory. However, we do have
+        // to tickle the pages to ensure that SP is always valid and is
+        // in sync with the "stack guard page".  Note that in the worst
+        // case SP is on the last byte of the guard page.  Thus you must
+        // touch SP-0 first not SP-0x1000.
+        //
+        // This is similar to the prolog code in CodeGen::genAllocLclFrame().
+        //
+        // Note that we go through a few hoops so that SP never points to
+        // illegal pages at any time during the tickling process.
+        //
+        //       sltu     R21, SP, regCnt
+        //       sub_d    regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        //       masknez  regCnt, regCnt, R21     // Overflow, pick lowest possible value
+        //
+        //       lu12i_w regTmp, eeGetPageSize()>>12
+        //  Loop:
+        //       ld_w   r0, 0(SP)               // tickle the page - read from the page
+        //       sub_d  R21, SP, regTmp         // decrement SP by eeGetPageSize()
+        //       bltu   R21, regCnt, Done
+        //       sub_d  SP, SP,regTmp
+        //       b     Loop
+        //
+        //  Done:
+        //       mov   SP, regCnt
+        //
+
+        // Setup the regTmp
+        regNumber regTmp = tree->GetSingleTempReg();
+
+        assert(regCnt != REG_R21);
+        emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, REG_R21, REG_SPBASE, regCnt);
+
+        //// dsubu  regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
+
+        // Overflow, set regCnt to lowest possible value
+        emit->emitIns_R_R_R(INS_masknez, EA_PTRSIZE, regCnt, regCnt, REG_R21);
+
+        assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize() >> 12) << 12));
+        emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize() >> 12);
+
+        // genDefineTempLabel(loop);
+
+        // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
+        emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SPBASE, 0);
+
+        // decrement SP by eeGetPageSize()
+        emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_R21, REG_SPBASE, regTmp);
+
+        assert(regTmp != REG_R21);
+
+        ssize_t imm = 3 << 2; // goto done.
+        emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_R21, regCnt, imm);
+
+        emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp);
+
+        imm = -4 << 2;
+        // Jump to loop and tickle new stack address
+        emit->emitIns_I(INS_b, EA_PTRSIZE, imm);
+
+        // Done with stack tickle loop
+        // genDefineTempLabel(done);
+
+        // Now just move the final value to SP
+        emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
+
+        // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
+        // we're going to assume the worst and probe.
+    }
+
+ALLOC_DONE:
+    // Re-adjust SP to allocate outgoing arg area. We must probe this adjustment.
+    if (stackAdjustment != 0)
+    {
+        assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+        assert((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || (lastTouchDelta >= 0));
+
+        const regNumber tmpReg = rsGetRsvdReg();
+
+        if ((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) ||
+            (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
+             compiler->eeGetPageSize()))
+        {
+            genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, tmpReg);
+        }
+        else
+        {
+            genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, tmpReg);
+        }
+
+        // Return the stackalloc'ed address in result register.
+        // TargetReg = SP + stackAdjustment.
+        //
+        genInstrWithConstant(INS_addi_d, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tmpReg);
+    }
+    else // stackAdjustment == 0
+    {
+        // Move the final value of SP to targetReg
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0);
+    }
+
+BAILOUT:
+    if (endLabel != nullptr)
+        genDefineTempLabel(endLabel);
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForNegNot(GenTree* tree)
+{
+    assert(tree->OperIs(GT_NEG, GT_NOT));
+
+    var_types targetType = tree->TypeGet();
+
+    assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
+
+    regNumber   targetReg = tree->GetRegNum();
+    instruction ins       = genGetInsForOper(tree);
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(!tree->isContained());
+    // The dst can only be a register.
+    assert(targetReg != REG_NA);
+
+    GenTree* operand = tree->gtGetOp1();
+    assert(!operand->isContained());
+    // The src must be a register.
+    regNumber operandReg = genConsumeReg(operand);
+
+    emitAttr attr = emitActualTypeSize(tree);
+    GetEmitter()->emitIns_R_R(ins, attr, targetReg, operandReg);
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForBswap(GenTree* tree)
+{
+    NYI_LOONGARCH64("genCodeForBswap unimpleement yet");
+}
+
+//------------------------------------------------------------------------
+// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node.
+// (1) float/double MOD is morphed into a helper call by front-end.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* tree)
+{
+    assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV));
+
+    var_types targetType = tree->TypeGet();
+    emitter*  emit       = GetEmitter();
+
+    genConsumeOperands(tree);
+
+    if (varTypeIsFloating(targetType))
+    {
+        // Floating point divide never raises an exception
+        assert(varTypeIsFloating(tree->gtOp1));
+        assert(varTypeIsFloating(tree->gtOp2));
+        assert(tree->gtOper == GT_DIV);
+        // genCodeForBinary(tree);
+        instruction ins = genGetInsForOper(tree);
+        emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(),
+                            tree->gtOp2->GetRegNum());
+    }
+    else // an integer divide operation
+    {
+        GenTree* divisorOp = tree->gtGetOp2();
+        // divisorOp can be immed or reg
+        assert(!divisorOp->isContained() || divisorOp->isContainedIntOrIImmed());
+
+        if (divisorOp->IsIntegralConst(0) || divisorOp->GetRegNum() == REG_R0)
+        {
+            // We unconditionally throw a divide by zero exception
+            genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
+        }
+        else // the divisor is not the constant zero
+        {
+            GenTree* src1     = tree->gtOp1;
+            unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet()));
+            emitAttr size     = EA_ATTR(TypeSize);
+
+            assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) &&
+                   TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet())));
+
+            // ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
+            regNumber   Reg1       = src1->GetRegNum();
+            regNumber   divisorReg = divisorOp->GetRegNum();
+            instruction ins;
+
+            // Check divisorOp first as we can always allow it to be a contained immediate
+            if (divisorOp->isContainedIntOrIImmed())
+            {
+                ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal);
+                divisorReg       = REG_R21;
+                emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst);
+            }
+            // Only for commutative operations do we check src1 and allow it to be a contained immediate
+            else if (tree->OperIsCommutative())
+            {
+                // src1 can be immed or reg
+                assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+                // Check src1 and allow it to be a contained immediate
+                if (src1->isContainedIntOrIImmed())
+                {
+                    assert(!divisorOp->isContainedIntOrIImmed());
+                    ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal);
+                    Reg1             = REG_R21;
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst);
+                }
+            }
+            else
+            {
+                // src1 can only be a reg
+                assert(!src1->isContained());
+            }
+
+            // Generate the require runtime checks for GT_DIV or GT_UDIV
+            if (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD)
+            {
+                // Two possible exceptions:
+                //     (AnyVal /  0) => DivideByZeroException
+                //     (MinInt / -1) => ArithmeticException
+                //
+                bool checkDividend = true;
+
+                // Do we have an immediate for the 'divisorOp'?
+                //
+                if (divisorOp->IsCnsIntOrI())
+                {
+                    ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
+                    // assert(intConstValue != 0); // already checked above by IsIntegralConst(0)
+                    if (intConstValue != -1)
+                    {
+                        checkDividend = false; // We statically know that the dividend is not -1
+                    }
+                }
+                else // insert check for divison by zero
+                {
+                    // Check if the divisor is zero throw a DivideByZeroException
+                    genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
+                }
+
+                if (checkDividend)
+                {
+                    // Check if the divisor is not -1 branch to 'sdivLabel'
+                    emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1);
+                    BasicBlock* sdivLabel = genCreateTempLabel(); // can optimize for loongarch64.
+                    emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_R21, divisorReg);
+
+                    // If control flow continues past here the 'divisorReg' is known to be -1
+                    regNumber dividendReg = tree->gtGetOp1()->GetRegNum();
+                    // At this point the divisor is known to be -1
+                    //
+                    // Wether dividendReg is MinInt or not
+                    //
+
+                    emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0);
+
+                    emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg,
+                                        dividendReg);
+                    genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_R21);
+                    genDefineTempLabel(sdivLabel);
+                }
+
+                // Generate the sdiv instruction
+                if (size == EA_4BYTE)
+                {
+                    if (tree->OperGet() == GT_DIV)
+                    {
+                        ins = INS_div_w;
+                    }
+                    else
+                    {
+                        ins = INS_mod_w;
+                    }
+                }
+                else
+                {
+                    if (tree->OperGet() == GT_DIV)
+                    {
+                        ins = INS_div_d;
+                    }
+                    else
+                    {
+                        ins = INS_mod_d;
+                    }
+                }
+
+                emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
+            }
+            else // if (tree->gtOper == GT_UDIV) GT_UMOD
+            {
+                // Only one possible exception
+                //     (AnyVal /  0) => DivideByZeroException
+                //
+                // Note that division by the constant 0 was already checked for above by the
+                // op2->IsIntegralConst(0) check
+                //
+
+                if (!divisorOp->IsCnsIntOrI())
+                {
+                    // divisorOp is not a constant, so it could be zero
+                    //
+                    genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
+                }
+
+                if (size == EA_4BYTE)
+                {
+                    if (tree->OperGet() == GT_UDIV)
+                    {
+                        ins = INS_div_wu;
+                    }
+                    else
+                    {
+                        ins = INS_mod_wu;
+                    }
+
+                    // TODO-LOONGARCH64: here is just for signed-extension ?
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0);
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0);
+                }
+                else
+                {
+                    if (tree->OperGet() == GT_UDIV)
+                    {
+                        ins = INS_div_du;
+                    }
+                    else
+                    {
+                        ins = INS_mod_du;
+                    }
+                }
+
+                emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
+            }
+        }
+    }
+    genProduceReg(tree);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+//   a) Both the size and fill byte value are integer constants.
+//   b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
+{
+    assert(node->OperIs(GT_STORE_BLK));
+
+    unsigned  dstLclNum      = BAD_VAR_NUM;
+    regNumber dstAddrBaseReg = REG_NA;
+    int       dstOffset      = 0;
+    GenTree*  dstAddr        = node->Addr();
+
+    if (!dstAddr->isContained())
+    {
+        dstAddrBaseReg = genConsumeReg(dstAddr);
+    }
+    else if (dstAddr->OperIsAddrMode())
+    {
+        assert(!dstAddr->AsAddrMode()->HasIndex());
+
+        dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base());
+        dstOffset      = dstAddr->AsAddrMode()->Offset();
+    }
+    else
+    {
+        assert(dstAddr->OperIsLocalAddr());
+        dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum();
+        dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs();
+    }
+
+    regNumber srcReg;
+    GenTree*  src = node->Data();
+
+    if (src->OperIs(GT_INIT_VAL))
+    {
+        assert(src->isContained());
+        src = src->gtGetOp1();
+    }
+
+    if (!src->isContained())
+    {
+        srcReg = genConsumeReg(src);
+    }
+    else
+    {
+        assert(src->IsIntegralConst(0));
+        srcReg = REG_R0;
+    }
+
+    if (node->IsVolatile())
+    {
+        instGen_MemoryBarrier();
+    }
+
+    emitter* emit = GetEmitter();
+    unsigned size = node->GetLayout()->GetSize();
+
+    assert(size <= INT32_MAX);
+    assert(dstOffset < INT32_MAX - static_cast<int>(size));
+
+    for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize)
+    {
+        if (dstLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset);
+            emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset + 8);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset);
+            emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset + 8);
+        }
+    }
+
+    for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize)
+    {
+        while (regSize > size)
+        {
+            regSize /= 2;
+        }
+
+        instruction storeIns;
+        emitAttr    attr;
+
+        switch (regSize)
+        {
+            case 1:
+                storeIns = INS_st_b;
+                attr     = EA_4BYTE;
+                break;
+            case 2:
+                storeIns = INS_st_h;
+                attr     = EA_4BYTE;
+                break;
+            case 4:
+                storeIns = INS_st_w;
+                attr     = EA_ATTR(regSize);
+                break;
+            case 8:
+                storeIns = INS_st_d;
+                attr     = EA_ATTR(regSize);
+                break;
+            default:
+                unreached();
+        }
+
+        if (dstLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_S_R(storeIns, attr, srcReg, dstLclNum, dstOffset);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset);
+        }
+    }
+}
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers.  The generated code will look like:
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// bl CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+    GenTree*  dstAddr       = cpObjNode->Addr();
+    GenTree*  source        = cpObjNode->Data();
+    var_types srcAddrType   = TYP_BYREF;
+    bool      sourceIsLocal = false;
+
+    assert(source->isContained());
+    if (source->gtOper == GT_IND)
+    {
+        GenTree* srcAddr = source->gtGetOp1();
+        assert(!srcAddr->isContained());
+        srcAddrType = srcAddr->TypeGet();
+    }
+    else
+    {
+        noway_assert(source->IsLocal());
+        sourceIsLocal = true;
+    }
+
+    bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr();
+
+#ifdef DEBUG
+    assert(!dstAddr->isContained());
+
+    // This GenTree node has data about GC pointers, this means we're dealing
+    // with CpObj.
+    assert(cpObjNode->GetLayout()->HasGCPtr());
+#endif // DEBUG
+
+    // Consume the operands and get them into the right registers.
+    // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+    genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
+    gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
+    gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
+
+    ClassLayout* layout = cpObjNode->GetLayout();
+    unsigned     slots  = layout->GetSlotCount();
+
+    // Temp register(s) used to perform the sequence of loads and stores.
+    regNumber tmpReg  = cpObjNode->ExtractTempReg();
+    regNumber tmpReg2 = REG_NA;
+
+    assert(genIsValidIntReg(tmpReg));
+    assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
+    assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
+
+    if (slots > 1)
+    {
+        tmpReg2 = cpObjNode->GetSingleTempReg();
+        assert(tmpReg2 != tmpReg);
+        assert(genIsValidIntReg(tmpReg2));
+        assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
+        assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
+    }
+
+    if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a full memory barrier before a volatile CpObj operation
+        instGen_MemoryBarrier();
+    }
+
+    emitter* emit = GetEmitter();
+
+    emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType);
+    emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());
+
+    // If we can prove it's on the stack we don't need to use the write barrier.
+    if (dstOnStack)
+    {
+        unsigned i = 0;
+        // Check if two or more remaining slots and use two ld/sd sequence
+        while (i < slots - 1)
+        {
+            emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0));
+            emitAttr attr1 = emitTypeSize(layout->GetGCPtrType(i + 1));
+
+            emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+            emit->emitIns_R_R_I(INS_ld_d, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF,
+                                2 * TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+            emit->emitIns_R_R_I(INS_st_d, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF,
+                                2 * TARGET_POINTER_SIZE);
+            i += 2;
+        }
+
+        // Use a ld/sd sequence for the last remainder
+        if (i < slots)
+        {
+            emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0));
+
+            emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF,
+                                TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF,
+                                TARGET_POINTER_SIZE);
+        }
+    }
+    else
+    {
+        unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();
+
+        unsigned i = 0;
+        while (i < slots)
+        {
+            if (!layout->IsGCPtr(i))
+            {
+                // Check if the next slot's type is also TYP_GC_NONE and use two ld/sd
+                if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
+                {
+                    emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF,
+                                        REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF,
+                                        REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE);
+                    ++i; // extra increment of i, since we are copying two items
+                }
+                else
+                {
+                    emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF,
+                                        REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF,
+                                        REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+                }
+            }
+            else
+            {
+                // In the case of a GC-Pointer we'll call the ByRef write barrier helper
+                genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+                gcPtrCount--;
+            }
+            ++i;
+        }
+        assert(gcPtrCount == 0);
+    }
+
+    if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a INS_BARRIER_RMB after a volatile CpObj operation
+        // TODO-LOONGARCH64: there is only BARRIER_FULL for LOONGARCH64.
+        instGen_MemoryBarrier(BARRIER_FULL);
+    }
+
+    // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
+    // While we normally update GC info prior to the last instruction that uses them,
+    // these actually live into the helper call.
+    gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+    genConsumeOperands(treeNode->AsOp());
+    regNumber idxReg  = treeNode->AsOp()->gtOp1->GetRegNum();
+    regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum();
+
+    regNumber tmpReg = treeNode->GetSingleTempReg();
+
+    // load the ip-relative offset (which is relative to start of fgFirstBB)
+    GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, idxReg, 2);
+    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_8BYTE, baseReg, baseReg, REG_R21);
+    GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0);
+
+    // add it to the absolute address of fgFirstBB
+    GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
+    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg);
+
+    // jr baseReg
+    GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, baseReg, 0);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+    noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+    assert(treeNode->OperGet() == GT_JMPTABLE);
+
+    unsigned     jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+    BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+    unsigned     jmpTabOffs;
+    unsigned     jmpTabBase;
+
+    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+    jmpTabOffs = 0;
+
+    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
+
+    for (unsigned i = 0; i < jumpCount; i++)
+    {
+        BasicBlock* target = *jumpTable++;
+        noway_assert(target->bbFlags & BBF_HAS_LABEL);
+
+        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
+
+        GetEmitter()->emitDataGenData(i, target);
+    };
+
+    GetEmitter()->emitDataGenEnd();
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    GetEmitter()->emitIns_R_C(INS_bl, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA,
+                              compiler->eeFindJitDataOffs(jmpTabBase), 0);
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
+//
+// Arguments:
+//    treeNode - the GT_XADD/XCHG node
+//
+void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//------------------------------------------------------------------------
+// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node.
+//
+// Arguments:
+//    tree - the GT_CMPXCHG node
+//
+void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+static inline bool isImmed(GenTree* treeNode)
+{
+    assert(treeNode->OperIsBinary());
+
+    if (treeNode->gtGetOp2()->isContainedIntOrIImmed())
+    {
+        return true;
+    }
+
+    return false;
+}
+
+instruction CodeGen::genGetInsForOper(GenTree* treeNode)
+{
+    var_types  type = treeNode->TypeGet();
+    genTreeOps oper = treeNode->OperGet();
+    GenTree*   op1  = treeNode->gtGetOp1();
+    GenTree*   op2;
+    emitAttr   attr  = emitActualTypeSize(treeNode);
+    bool       isImm = false;
+
+    instruction ins = INS_break;
+
+    if (varTypeIsFloating(type))
+    {
+        switch (oper)
+        {
+            case GT_ADD:
+                if (attr == EA_4BYTE)
+                {
+                    ins = INS_fadd_s;
+                }
+                else
+                {
+                    ins = INS_fadd_d;
+                }
+                break;
+            case GT_SUB:
+                if (attr == EA_4BYTE)
+                {
+                    ins = INS_fsub_s;
+                }
+                else
+                {
+                    ins = INS_fsub_d;
+                }
+                break;
+            case GT_MUL:
+                if (attr == EA_4BYTE)
+                {
+                    ins = INS_fmul_s;
+                }
+                else
+                {
+                    ins = INS_fmul_d;
+                }
+                break;
+            case GT_DIV:
+                if (attr == EA_4BYTE)
+                {
+                    ins = INS_fdiv_s;
+                }
+                else
+                {
+                    ins = INS_fdiv_d;
+                }
+                break;
+            case GT_NEG:
+                if (attr == EA_4BYTE)
+                {
+                    ins = INS_fneg_s;
+                }
+                else
+                {
+                    ins = INS_fneg_d;
+                }
+                break;
+
+            default:
+                NYI("Unhandled oper in genGetInsForOper() - float");
+                unreached();
+                break;
+        }
+    }
+    else
+    {
+        switch (oper)
+        {
+            case GT_ADD:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                    {
+                        ins = INS_addi_d;
+                    }
+                    else
+                    {
+                        assert(attr == EA_4BYTE);
+                        ins = INS_addi_w;
+                    }
+                }
+                else
+                {
+                    if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                    {
+                        ins = INS_add_d;
+                    }
+                    else
+                    {
+                        assert(attr == EA_4BYTE);
+                        ins = INS_add_w;
+                    }
+                }
+                break;
+
+            case GT_SUB:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_sub_d;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_sub_w;
+                }
+                break;
+
+            case GT_MOD:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_mod_d;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_mod_w;
+                }
+                break;
+
+            case GT_DIV:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_div_d;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_div_w;
+                }
+                break;
+
+            case GT_UMOD:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_mod_du;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_mod_wu;
+                }
+                break;
+
+            case GT_UDIV:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_div_du;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_div_wu;
+                }
+                break;
+
+            case GT_MUL:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    op2 = treeNode->gtGetOp2();
+                    if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2))
+                        ins = treeNode->IsUnsigned() ? INS_mulw_d_wu : INS_mulw_d_w;
+                    else
+                        ins = INS_mul_d;
+                }
+                else
+                {
+                    if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+                        ins = INS_mulw_d_wu;
+                    else
+                        ins = INS_mul_w;
+                }
+                break;
+
+            case GT_NEG:
+                if (attr == EA_8BYTE)
+                {
+                    ins = INS_dneg;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_neg;
+                }
+                break;
+
+            case GT_NOT:
+                ins = INS_not;
+                break;
+
+            case GT_AND:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    ins = INS_andi;
+                }
+                else
+                {
+                    ins = INS_and;
+                }
+                break;
+
+            case GT_AND_NOT:
+                assert(!isImmed(treeNode));
+                ins = INS_andn;
+                break;
+
+            case GT_OR:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    ins = INS_ori;
+                }
+                else
+                {
+                    ins = INS_or;
+                }
+                break;
+
+            case GT_LSH:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    // it's better to check sa.
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_slli_w;
+                    }
+                    else
+                    {
+                        ins = INS_slli_d;
+                    }
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_sll_w;
+                    }
+                    else
+                    {
+                        ins = INS_sll_d;
+                    }
+                }
+                break;
+
+            case GT_RSZ:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    // it's better to check sa.
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_srli_w;
+                    }
+                    else
+                    {
+                        ins = INS_srli_d;
+                    }
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_srl_w;
+                    }
+                    else
+                    {
+                        ins = INS_srl_d;
+                    }
+                }
+                break;
+
+            case GT_RSH:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    // it's better to check sa.
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_srai_w;
+                    }
+                    else
+                    {
+                        ins = INS_srai_d;
+                    }
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_sra_w;
+                    }
+                    else
+                    {
+                        ins = INS_sra_d;
+                    }
+                }
+                break;
+
+            case GT_ROR:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    // it's better to check sa.
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_rotri_w;
+                    }
+                    else
+                    {
+                        ins = INS_rotri_d;
+                    }
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                    {
+                        ins = INS_rotr_w;
+                    }
+                    else
+                    {
+                        ins = INS_rotr_d;
+                    }
+                }
+                break;
+
+            case GT_XOR:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    ins = INS_xori;
+                }
+                else
+                {
+                    ins = INS_xor;
+                }
+                break;
+
+            default:
+                NYI("Unhandled oper in genGetInsForOper() - integer");
+                unreached();
+                break;
+        }
+    }
+    return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
+//
+// Arguments:
+//    tree - the GT_RETURNTRAP node
+//
+void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
+{
+    assert(tree->OperGet() == GT_RETURNTRAP);
+
+    // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+    // based on the contents of 'data'
+
+    GenTree* data = tree->gtOp1;
+    genConsumeRegs(data);
+
+    BasicBlock* skipLabel = genCreateTempLabel();
+    GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0);
+
+    void*                 pAddr = nullptr;
+    void*                 addr  = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr);
+    emitter::EmitCallType callType;
+    regNumber             callTarget;
+
+    if (addr == nullptr)
+    {
+        callType   = emitter::EC_INDIR_R;
+        callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
+
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        }
+        else
+        {
+            // TODO-LOONGARCH64: maybe optimize further.
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2);
+        }
+        regSet.verifyRegUsed(callTarget);
+    }
+    else
+    {
+        callType   = emitter::EC_FUNC_TOKEN;
+        callTarget = REG_NA;
+    }
+
+    // TODO-LOONGARCH64: can optimize further !!!
+    GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC),
+                               INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur,
+                               gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                               callTarget,                                                    /* ireg */
+                               REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
+                               false                                                          /* isJump */
+                               );
+
+    genDefineTempLabel(skipLabel);
+
+    regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+    regSet.verifyRegistersUsed(killMask);
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreInd: Produce code for a GT_STOREIND node.
+//
+// Arguments:
+//    tree - the GT_STOREIND node
+//
+void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
+{
+#ifdef FEATURE_SIMD
+    // Storing Vector3 of size 12 bytes through indirection
+    if (tree->TypeGet() == TYP_SIMD12)
+    {
+        genStoreIndTypeSIMD12(tree);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    GenTree* data = tree->Data();
+    GenTree* addr = tree->Addr();
+
+    GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
+    if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+    {
+        // data and addr must be in registers.
+        // Consume both registers so that any copies of interfering
+        // registers are taken care of.
+        genConsumeOperands(tree);
+
+        // At this point, we should not have any interference.
+        // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
+        //  as that is where 'addr' must go.
+        noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST_BYREF);
+
+        // 'addr' goes into REG_T6 (REG_WRITE_BARRIER_DST)
+        genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST);
+
+        // 'data' goes into REG_T7 (REG_WRITE_BARRIER_SRC)
+        genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC);
+
+        genGCWriteBarrier(tree, writeBarrierForm);
+    }
+    else // A normal store, not a WriteBarrier store
+    {
+        // We must consume the operands in the proper execution order,
+        // so that liveness is updated appropriately.
+        genConsumeAddress(addr);
+
+        if (!data->isContained())
+        {
+            genConsumeRegs(data);
+        }
+
+        regNumber dataReg;
+        if (data->isContainedIntOrIImmed())
+        {
+            assert(data->IsIntegralConst(0));
+            dataReg = REG_R0;
+        }
+        else // data is not contained, so evaluate it into a register
+        {
+            assert(!data->isContained());
+            dataReg = data->GetRegNum();
+        }
+
+        var_types   type = tree->TypeGet();
+        instruction ins  = ins_Store(type);
+
+        if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+        {
+            // issue a full memory barrier before a volatile StInd
+            instGen_MemoryBarrier();
+        }
+
+        GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForSwap: Produce code for a GT_SWAP node.
+//
+// Arguments:
+//    tree - the GT_SWAP node
+//
+void CodeGen::genCodeForSwap(GenTreeOp* tree)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTree* treeNode)
+{
+    // int type --> float/double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->GetRegNum();
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTree* op1 = treeNode->AsOp()->gtOp1;
+    assert(!op1->isContained());                // Cannot be contained
+    assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = genActualType(op1->TypeGet());
+    assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    emitter* emit = GetEmitter();
+    emitAttr attr = emitActualTypeSize(dstType);
+
+    // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+    emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+    noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
+
+    bool        IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED;
+    instruction ins        = INS_invalid;
+
+    genConsumeOperands(treeNode->AsOp());
+
+    if (IsUnsigned)
+    {
+        emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1
+
+        if (srcSize == EA_8BYTE)
+        {
+            ssize_t imm = 4 << 2;
+            emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm);
+
+            emit->emitIns_R_R_I(INS_andi, EA_8BYTE, REG_R21, op1->GetRegNum(), 1);
+            emit->emitIns_R_R_I(INS_srli_d, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), 1);
+            emit->emitIns_R_R_R(INS_or, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), REG_R21);
+        }
+        else
+        {
+            srcSize = EA_8BYTE;
+            emit->emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, op1->GetRegNum(), REG_R0, 63, 32);
+        }
+    }
+
+    ins = srcSize == EA_8BYTE ? INS_movgr2fr_d : INS_movgr2fr_w;
+    emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), op1->GetRegNum());
+
+    if (dstType == TYP_DOUBLE)
+    {
+        if (srcSize == EA_4BYTE)
+        {
+            ins = INS_ffint_d_w;
+        }
+        else
+        {
+            assert(srcSize == EA_8BYTE);
+            ins = INS_ffint_d_l;
+        }
+    }
+    else
+    {
+        assert(dstType == TYP_FLOAT);
+        if (srcSize == EA_4BYTE)
+        {
+            ins = INS_ffint_s_w;
+        }
+        else
+        {
+            assert(srcSize == EA_8BYTE);
+            ins = INS_ffint_s_l;
+        }
+    }
+
+    emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), treeNode->GetRegNum());
+
+    if (IsUnsigned)
+    {
+        srcSize = EA_ATTR(genTypeSize(srcType));
+        emit->emitIns_R_R(INS_movfr2gr_d, attr, op1->GetRegNum(), REG_SCRATCH_FLT); // recover op1
+
+        if (srcSize == EA_8BYTE)
+        {
+            ssize_t imm = 3 << 2;
+            emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm);
+
+            emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT,
+                              treeNode->GetRegNum());
+            emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(),
+                                REG_SCRATCH_FLT, treeNode->GetRegNum());
+        }
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void CodeGen::genFloatToIntCast(GenTree* treeNode)
+{
+    // we don't expect to see overflow detecting float/double --> int type conversions here
+    // as they should have been converted into helper calls by front-end.
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->GetRegNum();
+    assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+    GenTree* op1 = treeNode->AsOp()->gtOp1;
+    assert(!op1->isContained());                  // Cannot be contained
+    assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+    // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+    // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+    // we expect the front-end or lowering phase to have generated two levels of cast.
+    //
+    emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+    noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
+
+    instruction ins1       = INS_invalid;
+    instruction ins2       = INS_invalid;
+    bool        IsUnsigned = varTypeIsUnsigned(dstType);
+
+    regNumber tmpReg = REG_SCRATCH_FLT;
+    assert(tmpReg != op1->GetRegNum());
+
+    if (srcType == TYP_DOUBLE)
+    {
+        if (dstSize == EA_4BYTE)
+        {
+            ins1 = INS_ftintrz_w_d;
+            ins2 = INS_movfr2gr_s;
+        }
+        else
+        {
+            assert(dstSize == EA_8BYTE);
+            ins1 = INS_ftintrz_l_d;
+            ins2 = INS_movfr2gr_d;
+        }
+    }
+    else
+    {
+        assert(srcType == TYP_FLOAT);
+        if (dstSize == EA_4BYTE)
+        {
+            ins1 = INS_ftintrz_w_s;
+            ins2 = INS_movfr2gr_s;
+        }
+        else
+        {
+            assert(dstSize == EA_8BYTE);
+            ins1 = INS_ftintrz_l_s;
+            ins2 = INS_movfr2gr_d;
+        }
+    }
+
+    genConsumeOperands(treeNode->AsOp());
+
+    if (IsUnsigned)
+    {
+        ssize_t imm = 0;
+
+        if (srcType == TYP_DOUBLE)
+        {
+            if (dstSize == EA_4BYTE)
+            {
+                imm = 0x41e00;
+            }
+            else
+            {
+                imm = 0x43e00;
+            }
+        }
+        else
+        {
+            assert(srcType == TYP_FLOAT);
+            if (dstSize == EA_4BYTE)
+            {
+                imm = 0x4f000;
+            }
+            else
+            {
+                imm = 0x5f000;
+            }
+        }
+
+        if (srcType == TYP_DOUBLE)
+            GetEmitter()->emitIns_R_R_I(INS_lu52i_d, EA_8BYTE, REG_R21, REG_R0, imm >> 8);
+        else
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm);
+
+        GetEmitter()->emitIns_R_R(srcType == TYP_DOUBLE ? INS_movgr2fr_d : INS_movgr2fr_w, EA_8BYTE, tmpReg, REG_R21);
+
+        GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(),
+                                    tmpReg, 2);
+
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 0);
+        GetEmitter()->emitIns_I_I(INS_bcnez, EA_PTRSIZE, 2, 4 << 2);
+
+        GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(),
+                                    tmpReg);
+
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 1);
+        GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21,
+                                    dstSize == EA_8BYTE ? 63 : 31);
+
+        GetEmitter()->emitIns_R_R_R_I(INS_fsel, EA_PTRSIZE, tmpReg, tmpReg, op1->GetRegNum(), 2);
+
+        GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, tmpReg);
+        GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg);
+
+        GetEmitter()->emitIns_R_R_R(INS_or, dstSize, treeNode->GetRegNum(), REG_R21, treeNode->GetRegNum());
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, op1->GetRegNum());
+        GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg);
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+//    treeNode - The GT_CKFINITE node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    GT_CKFINITE node has reserved an internal register.
+//
+void CodeGen::genCkfinite(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_CKFINITE);
+
+    GenTree*  op1        = treeNode->AsOp()->gtOp1;
+    var_types targetType = treeNode->TypeGet();
+    ssize_t   expMask    = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent.
+    int       size       = (targetType == TYP_FLOAT) ? 8 : 11;       // Bit size to extract exponent.
+    int       pos        = (targetType == TYP_FLOAT) ? 23 : 52;      // Bit pos of exponent.
+
+    emitter* emit = GetEmitter();
+    emitAttr attr = emitActualTypeSize(treeNode);
+
+    // Extract exponent into a register.
+    regNumber intReg = treeNode->GetSingleTempReg();
+    regNumber fpReg  = genConsumeReg(op1);
+
+    emit->emitIns_R_R(attr == EA_8BYTE ? INS_movfr2gr_d : INS_movfr2gr_s, attr, intReg, fpReg);
+
+    // Mask of exponent with all 1's and check if the exponent is all 1's
+    instruction ins = (targetType == TYP_FLOAT) ? INS_bstrpick_w : INS_bstrpick_d;
+    emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos + size - 1, pos);
+    emit->emitIns_R_R_I(INS_xori, attr, intReg, intReg, expMask);
+
+    genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, intReg);
+
+    // if it is a finite value copy it to targetReg
+    if (treeNode->GetRegNum() != fpReg)
+    {
+        emit->emitIns_R_R(ins_Copy(targetType), attr, treeNode->GetRegNum(), fpReg);
+    }
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForCompare(GenTreeOp* jtree)
+{
+    emitter* emit = GetEmitter();
+
+    GenTreeOp* tree = nullptr;
+    regNumber  targetReg;
+    if (jtree->OperIs(GT_JTRUE))
+    {
+        tree      = jtree->gtGetOp1()->AsOp();
+        targetReg = REG_RA;
+        assert(tree->GetRegNum() == REG_NA);
+
+        jtree->gtOp2 = (GenTree*)REG_RA; // targetReg
+        jtree->SetRegNum((regNumber)INS_bnez);
+    }
+    else
+    {
+        tree      = jtree;
+        targetReg = tree->GetRegNum();
+    }
+    assert(targetReg != REG_NA);
+
+    GenTree*  op1     = tree->gtOp1;
+    GenTree*  op2     = tree->gtOp2;
+    var_types op1Type = genActualType(op1->TypeGet());
+    var_types op2Type = genActualType(op2->TypeGet());
+
+    assert(!op1->isUsedFromMemory());
+    assert(!op2->isUsedFromMemory());
+
+    genConsumeOperands(tree);
+
+    emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+    assert(genTypeSize(op1Type) == genTypeSize(op2Type));
+
+    if (varTypeIsFloating(op1Type))
+    {
+        assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+        bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
+
+        if (IsUnordered)
+        {
+            if (tree->OperIs(GT_LT))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_EQ))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
+            }
+        }
+        else
+        {
+            if (tree->OperIs(GT_LT))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_EQ))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
+            }
+        }
+
+        emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0);
+        emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1 /*cc*/);
+    }
+    else
+    {
+        if (op1->isContainedIntOrIImmed())
+        {
+            op1 = tree->gtOp2;
+            op2 = tree->gtOp1;
+            switch (tree->OperGet())
+            {
+                case GT_LT:
+                    tree->SetOper(GT_GT);
+                    break;
+                case GT_LE:
+                    tree->SetOper(GT_GE);
+                    break;
+                case GT_GT:
+                    tree->SetOper(GT_LT);
+                    break;
+                case GT_GE:
+                    tree->SetOper(GT_LE);
+                    break;
+                default:
+                    break;
+            }
+        }
+        assert(!op1->isContainedIntOrIImmed());
+        assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+
+        bool      IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+        regNumber regOp1     = op1->GetRegNum();
+
+        if (op2->isContainedIntOrIImmed())
+        {
+            ssize_t imm = op2->AsIntCon()->gtIconVal;
+
+            switch (cmpSize)
+            {
+                case EA_4BYTE:
+                    imm = static_cast<int32_t>(imm);
+                    break;
+                case EA_8BYTE:
+                    break;
+                case EA_1BYTE:
+                    imm = static_cast<int8_t>(imm);
+                    break;
+                // case EA_2BYTE:
+                //    imm = static_cast<uint16_t>(imm);
+                //    break;
+                default:
+                    assert(!"Unexpected type in jumpTrue(imm).");
+            }
+
+            if (tree->OperIs(GT_LT))
+            {
+                if (!IsUnsigned && emitter::isValidSimm12(imm))
+                {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else if (IsUnsigned && emitter::isValidUimm11(imm))
+                {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else
+                {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                }
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                if (!IsUnsigned && emitter::isValidSimm12(imm + 1))
+                {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
+                }
+                else if (IsUnsigned && emitter::isValidUimm11(imm + 1))
+                {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1);
+                }
+                else
+                {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                }
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                if (!IsUnsigned && emitter::isValidSimm12(imm + 1))
+                {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
+                }
+                else if (IsUnsigned && emitter::isValidUimm11(imm + 1))
+                {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
+                }
+                else
+                {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1);
+                }
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                if (!IsUnsigned && emitter::isValidSimm12(imm))
+                {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else if (IsUnsigned && emitter::isValidUimm11(imm))
+                {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else
+                {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                }
+                emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                if (!imm)
+                {
+                    emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1);
+                }
+                else if (emitter::isValidUimm12(imm))
+                {
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
+                    emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
+                }
+                else
+                {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                    emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
+                }
+            }
+            else if (tree->OperIs(GT_EQ))
+            {
+                if (!imm)
+                {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1);
+                }
+                else if (emitter::isValidUimm12(imm))
+                {
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
+                }
+                else
+                {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
+                }
+            }
+        }
+        else
+        {
+            regNumber regOp2 = op2->GetRegNum();
+
+            if ((cmpSize == EA_4BYTE) && IsUnsigned)
+            {
+                regNumber tmpRegOp1 = REG_RA;
+                regNumber tmpRegOp2 = rsGetRsvdReg();
+
+                emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, regOp1, 0);
+                emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, regOp2, 0);
+
+                regOp1 = tmpRegOp1;
+                regOp2 = tmpRegOp2;
+            }
+
+            if (tree->OperIs(GT_LT))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
+                emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
+                emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
+                emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
+            }
+            else if (tree->OperIs(GT_EQ))
+            {
+                emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
+                emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForJumpTrue: Generate code for a GT_JTRUE node.
+//
+// Arguments:
+//    jtrue - The node
+//
+void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
+{
+    emitter* emit = GetEmitter();
+
+    GenTreeOp*  tree      = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue;
+    regNumber   targetReg = tree->GetRegNum();
+    instruction ins       = INS_invalid;
+
+    if (jtrue->OperIs(GT_JTRUE) && jtrue->gtOp2)
+    {
+        emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest,
+                        (int)(int64_t)jtrue->gtOp2); // 5-bits;
+        jtrue->SetRegNum(REG_NA);
+        jtrue->gtOp2 = nullptr;
+        return;
+    }
+    else
+    {
+        GenTree* op1 = tree->gtOp1;
+        GenTree* op2 = tree->gtOp2;
+
+        var_types op1Type = genActualType(op1->TypeGet());
+        var_types op2Type = genActualType(op2->TypeGet());
+
+        bool IsEq = tree == jtrue->gtPrev;
+
+        assert(!op1->isUsedFromMemory());
+        assert(!op2->isUsedFromMemory());
+
+        genConsumeOperands(tree);
+
+        emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+        assert(targetReg == REG_NA);
+        int SaveCcResultReg = (int)REG_RA << 5;
+
+        if (varTypeIsFloating(op1Type))
+        {
+            assert(genTypeSize(op1Type) == genTypeSize(op2Type));
+
+            assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+            bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
+
+            // here default use cc = 1 for float comparing.
+            if (tree->OperIs(GT_EQ))
+            {
+                ins = INS_bcnez;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                ins = INS_bceqz;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_LT))
+            {
+                ins = INS_bcnez;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                ins = INS_bcnez;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                ins = INS_bceqz;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                ins = INS_bceqz;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
+            }
+
+            if (IsEq)
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1 /*cc*/); // 5-bits;
+            else
+            {
+                jtrue->gtOp2 = (GenTree*)(1 /*cc*/);
+                jtrue->SetRegNum((regNumber)ins);
+            }
+        }
+        else
+        {
+            if (op1->isContainedIntOrIImmed())
+            {
+                op1 = tree->gtOp2;
+                op2 = tree->gtOp1;
+                switch (tree->OperGet())
+                {
+                    case GT_LT:
+                        tree->SetOper(GT_GT);
+                        break;
+                    case GT_LE:
+                        tree->SetOper(GT_GE);
+                        break;
+                    case GT_GT:
+                        tree->SetOper(GT_LT);
+                        break;
+                    case GT_GE:
+                        tree->SetOper(GT_LE);
+                        break;
+                    default:
+                        break;
+                }
+            }
+
+            assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+
+            bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+
+            regNumber regOp1 = op1->GetRegNum();
+
+            if (op2->isContainedIntOrIImmed())
+            {
+                ssize_t imm = op2->AsIntCon()->gtIconVal;
+
+                if (imm)
+                {
+                    switch (cmpSize)
+                    {
+                        case EA_4BYTE:
+                            if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED))
+                            {
+                                imm = static_cast<uint32_t>(imm);
+                            }
+                            else
+                            {
+                                imm = static_cast<int32_t>(imm);
+                            }
+                            break;
+                        case EA_8BYTE:
+                            break;
+                        case EA_1BYTE:
+                            imm = static_cast<int8_t>(imm);
+                            break;
+
+                        default:
+                            assert(!"Unexpected type in jumpTrue(imm).");
+                    }
+
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                }
+                else
+                {
+                    SaveCcResultReg = 0;
+                }
+
+                if (tree->OperIs(GT_LT))
+                {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_LE))
+                {
+                    SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5);
+                    ins             = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_GT))
+                {
+                    SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5);
+                    ins             = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_GE))
+                {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_NE))
+                {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = INS_bne;
+                }
+                else if (tree->OperIs(GT_EQ))
+                {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = INS_beq;
+                }
+            }
+            else
+            {
+                regNumber regOp2 = op2->GetRegNum();
+                if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) &&
+                    compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                {
+                    regNumber tmpRegOp1 = rsGetRsvdReg();
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
+                    regOp1 = tmpRegOp1;
+                    regOp2 = REG_RA;
+                }
+                else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) &&
+                         compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                {
+                    regNumber tmpRegOp1 = rsGetRsvdReg();
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
+                    regOp1 = tmpRegOp1;
+                    regOp2 = REG_RA;
+                }
+                else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) &&
+                         compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                {
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0);
+                    regOp2 = REG_RA;
+                }
+                else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED))
+                {
+                    if (!(op1->gtFlags & GTF_UNSIGNED))
+                    {
+                        regNumber tmpRegOp1 = rsGetRsvdReg();
+                        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
+                        regOp1 = tmpRegOp1;
+                    }
+                    if (!(op2->gtFlags & GTF_UNSIGNED))
+                    {
+                        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
+                        regOp2 = REG_RA;
+                    }
+                }
+
+                if (tree->OperIs(GT_LT))
+                {
+                    SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5));
+                    ins             = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_LE))
+                {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins             = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_GT))
+                {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins             = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_GE))
+                {
+                    SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5));
+                    ins             = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_NE))
+                {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins             = INS_bne;
+                }
+                else if (tree->OperIs(GT_EQ))
+                {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins             = INS_beq;
+                }
+            }
+
+            if (IsEq)
+            {
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg); // 5-bits;
+            }
+            else
+            {
+                jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
+                jtrue->SetRegNum((regNumber)ins);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForJumpCompare: Generates code for jmpCompare statement.
+//
+// A GT_JCMP node is created when a comparison and conditional branch
+// can be executed in a single instruction.
+//
+// LOONGARCH64 has a few instructions with this behavior.
+//   - beq/bne -- Compare and branch register equal/not equal
+//
+// The beq/bne supports the normal +/- 2^15 branch range for conditional branches
+//
+// A GT_JCMP beq/bne node is created when there is a GT_EQ or GT_NE
+// integer/unsigned comparison against the value of Rt register which is used by
+// a GT_JTRUE condition jump node.
+//
+// This node is repsonsible for consuming the register, and emitting the
+// appropriate fused compare/test and branch instruction
+//
+// Two flags guide code generation
+//    GTF_JCMP_EQ  -- Set if this is beq rather than bne
+//
+// Arguments:
+//    tree - The GT_JCMP tree node.
+//
+// Return Value:
+//    None
+//
+void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
+{
+    assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    assert(tree->OperIs(GT_JCMP));
+    assert(!varTypeIsFloating(tree));
+    assert(!op1->isUsedFromMemory());
+    assert(!op2->isUsedFromMemory());
+    assert(op2->IsCnsIntOrI());
+    assert(op2->isContained());
+
+    genConsumeOperands(tree);
+
+    regNumber reg  = op1->GetRegNum();
+    emitAttr  attr = emitActualTypeSize(op1->TypeGet());
+
+    instruction ins;
+    int         regs;
+    if (op2->AsIntCon()->gtIconVal)
+    {
+        assert(reg != REG_R21);
+        ssize_t imm = op2->AsIntCon()->gtIconVal;
+        if (attr == EA_4BYTE)
+        {
+            assert(reg != REG_RA);
+            imm = (int32_t)imm;
+            GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0);
+            reg = REG_RA;
+        }
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+        regs = (int)reg << 5;
+        regs |= (int)REG_R21;
+        ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne;
+    }
+    else
+    {
+        regs = (int)reg;
+        ins  = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez;
+    }
+
+    GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits;
+}
+
+//---------------------------------------------------------------------
+// genSPtoFPdelta - return offset from the stack pointer (Initial-SP) to the frame pointer. The frame pointer
+// will point to the saved frame pointer slot (i.e., there will be frame pointer chaining).
+//
+int CodeGenInterface::genSPtoFPdelta() const
+{
+    assert(isFramePointerUsed());
+
+    int delta = compiler->lvaOutgoingArgSpaceSize;
+
+    assert(delta >= 0);
+    return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc.
+//
+// Return value:
+//    Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize() const
+{
+    // For varargs functions, we home all the incoming register arguments. They are not
+    // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
+    // since we don't use "push" instructions to save them, we don't have to do the
+    // save of these varargs register arguments as the first thing in the prolog.
+
+    assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+    int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+    assert(totalFrameSize > 0);
+    return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta() const
+{
+    assert(isFramePointerUsed());
+    int callerSPtoFPdelta;
+
+    callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+    assert(callerSPtoFPdelta <= 0);
+    return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta() const
+{
+    int callerSPtoSPdelta = 0;
+
+    callerSPtoSPdelta -= genTotalFrameSize();
+
+    assert(callerSPtoSPdelta <= 0);
+    return callerSPtoSPdelta;
+}
+
+/*****************************************************************************
+ *  Emit a call to a helper function.
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
+{
+    void* addr  = nullptr;
+    void* pAddr = nullptr;
+
+    emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+    addr                           = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+    regNumber callTarget           = REG_NA;
+
+    if (addr == nullptr)
+    {
+        // This is call to a runtime helper.
+        // li reg, pAddr     #NOTE: this maybe muti-instructions.
+        // ld_d reg, reg
+        // jirl reg
+
+        if (callTargetReg == REG_NA)
+        {
+            // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+            // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+            callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+        }
+
+        regMaskTP callTargetMask = genRegMask(callTargetReg);
+        regMaskTP callKillSet    = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+        // assert that all registers in callTargetMask are in the callKillSet
+        noway_assert((callTargetMask & callKillSet) == callTargetMask);
+
+        callTarget = callTargetReg;
+
+        if (compiler->opts.compReloc)
+        {
+            // TODO-LOONGARCH64: here the bl is special flag rather than a real instruction.
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        }
+        else
+        {
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2);
+        }
+        regSet.verifyRegUsed(callTarget);
+
+        callType = emitter::EC_INDIR_R;
+    }
+
+    GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+                               retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                               gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                               callTarget,                           /* ireg */
+                               REG_NA, 0, 0,                         /* xreg, xmul, disp */
+                               false                                 /* isJump */
+                               );
+
+    regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+    regSet.verifyRegistersUsed(killMask);
+}
+
+#ifdef FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsic: Generate code for a SIMD Intrinsic.  This is the main
+// routine which in turn calls appropriate genSIMDIntrinsicXXX() routine.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
+//    a limited set of methods.
+//
+// TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp.
+void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+    return INS_OPTS_NONE;
+}
+
+// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
+//
+// Arguments:
+//   intrinsicId    -   SIMD intrinsic Id
+//   baseType       -   Base type of the SIMD vector
+//   immed          -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
+//
+//
+// Return Value:
+//   Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
+//
+instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+    return INS_invalid;
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//-------------------------------------------------------------------------------------------
+// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
+//                        a number of arguments equal to the length of the Vector.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//----------------------------------------------------------------------------------
+// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Notes:
+//    The Widen intrinsics are broken into separate intrinsics for the two results.
+//
+void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Notes:
+//    This intrinsic takes two arguments. The first operand is narrowed to produce the
+//    lower elements of the results, and the second operand produces the high elements.
+//
+void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
+// add, sub, mul, bit-wise And, AndNot and Or.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
+// == and !=
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to
+//                            the given register, if any, or to memory.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    The upper half of all SIMD registers are volatile, even the callee-save registers.
+//    When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic
+//    to cause the upper half to be saved.  It will first attempt to find another, unused, callee-save
+//    register.  If such a register cannot be found, it will save it to an available caller-save register.
+//    In that case, this node will be marked GTF_SPILL, which will cause this method to save
+//    the upper half to the lclVar's home location.
+//
+void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to
+//                               the given register, if any, or to memory.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
+//    have their home register, this node has its targetReg on the lclVar child, and its source
+//    on the simdNode.
+//    Regarding spill, please see the note above on genSIMDIntrinsicUpperSave.  If we have spilled
+//    an upper-half to the lclVar's home location, this node will be marked GTF_SPILLED.
+//
+void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//-----------------------------------------------------------------------------
+// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two writes: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to store indirect
+//
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//-----------------------------------------------------------------------------
+// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two loads: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node of GT_IND
+//
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//-----------------------------------------------------------------------------
+// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two stores: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to store TYP_SIMD12 field
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+#endif // FEATURE_SIMD
+
+/*****************************************************************************
+ * Unit testing of the LOONGARCH64 emitter: generate a bunch of instructions into the prolog
+ * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+ * disassembler thinks the instructions as the same as we do.
+ */
+
+// Uncomment "#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG)
+void CodeGen::genLoongArch64EmitterUnitTests()
+{
+    if (!verbose)
+    {
+        return;
+    }
+
+    if (!compiler->opts.altJit)
+    {
+        // No point doing this in a "real" JIT.
+        return;
+    }
+
+    // Mark the "fake" instructions in the output.
+    printf("*************** In genLoongArch64EmitterUnitTests()\n");
+
+    printf("*************** End of genLoongArch64EmitterUnitTests()\n");
+}
+#endif // defined(DEBUG)
+
+//------------------------------------------------------------------------
+// genStackPointerConstantAdjustment: add a specified constant value to the stack pointer.
+// No probe is done.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP. Must be negative or zero.
+//    regTmp                  - an available temporary register that is used if 'spDelta' cannot be encoded by
+//                              'sub sp, sp, #spDelta' instruction.
+//                              Can be REG_NA if the caller knows for certain that 'spDelta' fits into the immediate
+//                              value range.
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp)
+{
+    assert(spDelta < 0);
+
+    // We assert that the SP change is less than one page. If it's greater, you should have called a
+    // function that does a probe, which will in turn call this function.
+    assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());
+
+    if (emitter::isValidSimm12(spDelta))
+    {
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta);
+    }
+    else
+    {
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, spDelta);
+        GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21);
+    }
+}
+
+//------------------------------------------------------------------------
+// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer,
+// and probe the stack as appropriate. Should only be called as a helper for
+// genStackPointerConstantAdjustmentLoopWithProbe.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP. Must be negative or zero. If zero, the probe happens,
+//                              but the stack pointer doesn't move.
+//    regTmp                  - temporary register to use as target for probe load instruction
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp)
+{
+    GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0);
+    genStackPointerConstantAdjustment(spDelta, regTmp);
+}
+
+//------------------------------------------------------------------------
+// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer,
+// and probe the stack as appropriate. Generates one probe per page, up to the total amount required.
+// This will generate a sequence of probes in-line.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP. Must be negative.
+//    regTmp                  - temporary register to use as target for probe load instruction
+//
+// Return Value:
+//    Offset in bytes from SP to last probed address.
+//
+target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp)
+{
+    assert(spDelta < 0);
+
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    ssize_t spRemainingDelta = spDelta;
+    do
+    {
+        ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize);
+        genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp);
+        spRemainingDelta -= spOneDelta;
+    } while (spRemainingDelta < 0);
+
+    // What offset from the final SP was the last probe? This depends on the fact that
+    // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP".
+    target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize;
+    if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize))
+    {
+        // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact
+        // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed
+        // the page, but very far from the end. If the next action on the stack might subtract from SP
+        // first, before touching the current SP, then we do one more probe at the very bottom. This can
+        // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV"
+        // strategy.
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0);
+        lastTouchDelta = 0;
+    }
+
+    return lastTouchDelta;
+}
+
+//------------------------------------------------------------------------
+// genCodeForTreeNode Generate code for a single node in the tree.
+//
+// Preconditions:
+//    All operands have been evaluated.
+//
+void CodeGen::genCodeForTreeNode(GenTree* treeNode)
+{
+    regNumber targetReg  = treeNode->GetRegNum();
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = GetEmitter();
+
+#ifdef DEBUG
+    // Validate that all the operands for the current node are consumed in order.
+    // This is important because LSRA ensures that any necessary copies will be
+    // handled correctly.
+    lastConsumedNode = nullptr;
+    if (compiler->verbose)
+    {
+        unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+        compiler->gtDispLIRNode(treeNode, "Generating: ");
+    }
+#endif // DEBUG
+
+    // Is this a node whose value is already in a register?  LSRA denotes this by
+    // setting the GTF_REUSE_REG_VAL flag.
+    if (treeNode->IsReuseRegVal())
+    {
+        // For now, this is only used for constant nodes.
+        assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
+        JITDUMP("  TreeNode is marked ReuseReg\n");
+        return;
+    }
+
+    // contained nodes are part of their parents for codegen purposes
+    // ex : immediates, most LEAs
+    if (treeNode->isContained())
+    {
+        return;
+    }
+
+    switch (treeNode->gtOper)
+    {
+        case GT_START_NONGC:
+            GetEmitter()->emitDisableGC();
+            break;
+
+        case GT_START_PREEMPTGC:
+            // Kill callee saves GC registers, and create a label
+            // so that information gets propagated to the emitter.
+            gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED);
+            genDefineTempLabel(genCreateTempLabel());
+            break;
+
+        case GT_PROF_HOOK:
+            // We should be seeing this only if profiler hook is needed
+            noway_assert(compiler->compIsProfilerHookNeeded());
+
+#ifdef PROFILING_SUPPORTED
+            // Right now this node is used only for tail calls. In future if
+            // we intend to use it for Enter or Leave hooks, add a data member
+            // to this node indicating the kind of profiler hook. For example,
+            // helper number can be used.
+            genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+            break;
+
+        case GT_LCLHEAP:
+            genLclHeap(treeNode);
+            break;
+
+        case GT_CNS_INT:
+            if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT))
+            {
+                treeNode->gtOper = GT_CNS_DBL;
+            }
+            FALLTHROUGH;
+        case GT_CNS_DBL:
+            genSetRegToConst(targetReg, targetType, treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_NOT:
+        case GT_NEG:
+            genCodeForNegNot(treeNode);
+            break;
+
+        case GT_BSWAP:
+        case GT_BSWAP16:
+            genCodeForBswap(treeNode);
+            break;
+
+        case GT_MOD:
+        case GT_UMOD:
+        case GT_DIV:
+        case GT_UDIV:
+            genCodeForDivMod(treeNode->AsOp());
+            break;
+
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+            assert(varTypeIsIntegralOrI(treeNode));
+
+            FALLTHROUGH;
+
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+            genConsumeOperands(treeNode->AsOp());
+            genCodeForBinary(treeNode->AsOp());
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROR:
+            genCodeForShift(treeNode);
+            break;
+
+        case GT_CAST:
+            genCodeForCast(treeNode->AsOp());
+            break;
+
+        case GT_BITCAST:
+            genCodeForBitCast(treeNode->AsOp());
+            break;
+
+        case GT_LCL_FLD_ADDR:
+        case GT_LCL_VAR_ADDR:
+            genCodeForLclAddr(treeNode->AsLclVarCommon());
+            break;
+
+        case GT_LCL_FLD:
+            genCodeForLclFld(treeNode->AsLclFld());
+            break;
+
+        case GT_LCL_VAR:
+            genCodeForLclVar(treeNode->AsLclVar());
+            break;
+
+        case GT_STORE_LCL_FLD:
+            genCodeForStoreLclFld(treeNode->AsLclFld());
+            break;
+
+        case GT_STORE_LCL_VAR:
+            genCodeForStoreLclVar(treeNode->AsLclVar());
+            break;
+
+        case GT_RETFILT:
+        case GT_RETURN:
+            genReturn(treeNode);
+            break;
+
+        case GT_LEA:
+            // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction.
+            genLeaInstruction(treeNode->AsAddrMode());
+            break;
+
+        case GT_INDEX_ADDR:
+            genCodeForIndexAddr(treeNode->AsIndexAddr());
+            break;
+
+        case GT_IND:
+            genCodeForIndir(treeNode->AsIndir());
+            break;
+
+        case GT_INC_SATURATE:
+            genCodeForIncSaturate(treeNode);
+            break;
+
+        case GT_MULHI:
+            genCodeForMulHi(treeNode->AsOp());
+            break;
+
+        case GT_SWAP:
+            genCodeForSwap(treeNode->AsOp());
+            break;
+
+        case GT_JMP:
+            genJmpMethod(treeNode);
+            break;
+
+        case GT_CKFINITE:
+            genCkfinite(treeNode);
+            break;
+
+        case GT_INTRINSIC:
+            genIntrinsic(treeNode);
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            genSIMDIntrinsic(treeNode->AsSIMD());
+            break;
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWINTRINSIC:
+            genHWIntrinsic(treeNode->AsHWIntrinsic());
+            break;
+#endif // FEATURE_HW_INTRINSICS
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        case GT_CMP:
+            if (treeNode->GetRegNum() != REG_NA)
+            {
+                genCodeForCompare(treeNode->AsOp());
+            }
+            else if (!treeNode->gtNext)
+            {
+                genCodeForJumpTrue(treeNode->AsOp());
+            }
+            else if (!treeNode->gtNext->OperIs(GT_JTRUE))
+            {
+                GenTree* treeNode_next = treeNode->gtNext;
+                while (treeNode_next)
+                {
+                    if (treeNode_next->OperIs(GT_JTRUE))
+                    {
+                        break;
+                    }
+                    treeNode_next = treeNode_next->gtNext;
+                };
+                assert(treeNode_next->OperIs(GT_JTRUE));
+                // genCodeForJumpTrue(treeNode_next->AsOp());
+                genCodeForCompare(treeNode_next->AsOp());
+            }
+            break;
+
+        case GT_JTRUE:
+            genCodeForJumpTrue(treeNode->AsOp());
+            break;
+
+        case GT_JCMP:
+            genCodeForJumpCompare(treeNode->AsOp());
+            break;
+
+        case GT_RETURNTRAP:
+            genCodeForReturnTrap(treeNode->AsOp());
+            break;
+
+        case GT_STOREIND:
+            genCodeForStoreInd(treeNode->AsStoreInd());
+            break;
+
+        case GT_COPY:
+            // This is handled at the time we call genConsumeReg() on the GT_COPY
+            break;
+
+        case GT_FIELD_LIST:
+            // Should always be marked contained.
+            assert(!"LIST, FIELD_LIST nodes should always be marked contained.");
+            break;
+
+        case GT_PUTARG_STK:
+            genPutArgStk(treeNode->AsPutArgStk());
+            break;
+
+        case GT_PUTARG_REG:
+            genPutArgReg(treeNode->AsOp());
+            break;
+
+#if FEATURE_ARG_SPLIT
+        case GT_PUTARG_SPLIT:
+            genPutArgSplit(treeNode->AsPutArgSplit());
+            break;
+#endif // FEATURE_ARG_SPLIT
+
+        case GT_CALL:
+            genCall(treeNode->AsCall());
+            break;
+
+        case GT_MEMORYBARRIER:
+        {
+            CodeGen::BarrierKind barrierKind =
+                treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
+
+            instGen_MemoryBarrier(barrierKind);
+            break;
+        }
+
+        case GT_XCHG:
+        case GT_XADD:
+            genLockedInstructions(treeNode->AsOp());
+            break;
+
+        case GT_CMPXCHG:
+            genCodeForCmpXchg(treeNode->AsCmpXchg());
+            break;
+
+        case GT_RELOAD:
+            // do nothing - reload is just a marker.
+            // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+            // into the register specified in this node.
+            break;
+
+        case GT_NOP:
+            break;
+
+        case GT_KEEPALIVE:
+            if (treeNode->AsOp()->gtOp1->isContained())
+            {
+                // For this case we simply need to update the lifetime of the local.
+                genUpdateLife(treeNode->AsOp()->gtOp1);
+            }
+            else
+            {
+                genConsumeReg(treeNode->AsOp()->gtOp1);
+            }
+            break;
+
+        case GT_NO_OP:
+            instGen(INS_nop);
+            break;
+
+        case GT_BOUNDS_CHECK:
+            genRangeCheck(treeNode);
+            break;
+
+        case GT_PHYSREG:
+            genCodeForPhysReg(treeNode->AsPhysReg());
+            break;
+
+        case GT_NULLCHECK:
+            genCodeForNullCheck(treeNode->AsIndir());
+            break;
+
+        case GT_CATCH_ARG:
+
+            noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+            /* Catch arguments get passed in a register. genCodeForBBlist()
+               would have marked it as holding a GC object, but not used. */
+
+            noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+            genConsumeReg(treeNode);
+            break;
+
+        case GT_PINVOKE_PROLOG:
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+// the runtime side requires the codegen here to be consistent
+#ifdef PSEUDORANDOM_NOP_INSERTION
+            emit->emitDisableRandomNops();
+#endif // PSEUDORANDOM_NOP_INSERTION
+            break;
+
+        case GT_LABEL:
+            genPendingCallLabel = genCreateTempLabel();
+            emit->emitIns_R_L(INS_ld_d, EA_PTRSIZE, genPendingCallLabel, targetReg);
+            break;
+
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+        case GT_STORE_BLK:
+            genCodeForStoreBlk(treeNode->AsBlk());
+            break;
+
+        case GT_JMPTABLE:
+            genJumpTable(treeNode);
+            break;
+
+        case GT_SWITCH_TABLE:
+            genTableBasedSwitch(treeNode);
+            break;
+
+        case GT_ARR_INDEX:
+            genCodeForArrIndex(treeNode->AsArrIndex());
+            break;
+
+        case GT_ARR_OFFSET:
+            genCodeForArrOffset(treeNode->AsArrOffs());
+            break;
+
+        case GT_IL_OFFSET:
+            // Do nothing; these nodes are simply markers for debug info.
+            break;
+
+        default:
+        {
+#ifdef DEBUG
+            char message[256];
+            _snprintf_s(message, ArrLen(message), _TRUNCATE, "NYI: Unimplemented node type %s",
+                        GenTree::OpName(treeNode->OperGet()));
+            NYIRAW(message);
+#else
+            NYI("unimplemented node");
+#endif
+        }
+        break;
+    }
+}
+
+//------------------------------------------------------------------------
+// genSetRegToIcon: Generate code that will set the given register to the integer constant.
+//
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type)
+{
+    // Reg cannot be a FP reg
+    assert(!genIsValidFloatReg(reg));
+
+    // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+    // relocatable.  Other ref type constants (e.g. string objects) go through a different
+    // code path.
+    noway_assert((type != TYP_REF) || (val == 0));
+
+    GetEmitter()->emitIns_I_la(emitActualTypeSize(type), reg, val);
+    regSet.verifyRegUsed(reg);
+}
+
+//---------------------------------------------------------------------
+// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog.
+//
+// Arguments:
+//     initReg        - register to use as a scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
+//                      this call sets 'initReg' to a non-zero value.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (!compiler->getNeedsGSSecurityCookie())
+    {
+        return;
+    }
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        noway_assert(compiler->gsGlobalSecurityCookieVal != 0);
+        // initReg = #GlobalSecurityCookieVal; [frame.GSSecurityCookie] = initReg
+        genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+        GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
+    }
+    else
+    {
+        // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg,
+                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        }
+        else
+        {
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg,
+            // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg,
+                                      ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg,
+                                      (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg,
+                                        ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2);
+        }
+        regSet.verifyRegUsed(initReg);
+        GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
+    }
+
+    *pInitRegZeroed = false;
+}
+
+//------------------------------------------------------------------------
+// genEmitGSCookieCheck: Generate code to check that the GS cookie
+// wasn't thrashed by a buffer overrun.
+//
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+    // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0).
+    if (!pushReg && (compiler->info.compRetNativeType == TYP_REF))
+    {
+        gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+    }
+
+    // We need two temporary registers, to load the GS cookie values and compare them. We can't use
+    // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
+    // callee-trash registers, which should not contain anything interesting at this point.
+    // We don't have any IR node representing this check, so LSRA can't communicate registers
+    // for us to use.
+
+    regNumber regGSConst = REG_GSCOOKIE_TMP_0;
+    regNumber regGSValue = REG_GSCOOKIE_TMP_1;
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        // load the GS cookie constant into a reg
+        //
+        genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+    }
+    else
+    {
+        //// Ngen case - GS cookie constant needs to be accessed through an indirection.
+        // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst,
+                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        }
+        else
+        {
+            // TODO-LOONGARCH64: maybe optimize further!
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
+            // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst,
+                                      ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst,
+                                      (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst,
+                                        ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2);
+        }
+        regSet.verifyRegUsed(regGSConst);
+    }
+    // Load this method's GS value from the stack frame
+    GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+
+    // Compare with the GC cookie constant
+    BasicBlock* gsCheckBlk = genCreateTempLabel();
+    GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue);
+
+    // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
+    genDefineTempLabel(gsCheckBlk);
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+//    treeNode - the GT_INTRINSIC node
+//
+// Return value:
+//    None
+//
+void CodeGen::genIntrinsic(GenTree* treeNode)
+{
+    NYI("unimplemented on LOONGARCH64 yet");
+}
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for a GT_PUTARG_STK node
+//
+// Arguments
+//    treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
+{
+    assert(treeNode->OperIs(GT_PUTARG_STK));
+    GenTree*  source     = treeNode->gtOp1;
+    var_types targetType = genActualType(source->TypeGet());
+    emitter*  emit       = GetEmitter();
+
+    // This is the varNum for our store operations,
+    // typically this is the varNum for the Outgoing arg space
+    // When we are generating a tail call it will be the varNum for arg0
+    unsigned varNumOut    = (unsigned)-1;
+    unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks
+
+    // Get argument offset to use with 'varNumOut'
+    // Here we cross check that argument offset hasn't changed from lowering to codegen since
+    // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+    unsigned argOffsetOut = treeNode->getArgOffset();
+
+#ifdef DEBUG
+    fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode);
+    assert(curArgTabEntry != nullptr);
+    DEBUG_ARG_SLOTS_ASSERT(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
+#endif // DEBUG
+
+    // Whether to setup stk arg in incoming or out-going arg area?
+    // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+    // All other calls - stk arg is setup in out-going arg area.
+    if (treeNode->putInIncomingArgArea())
+    {
+        varNumOut    = getFirstArgWithStackSlot();
+        argOffsetMax = compiler->compArgSize;
+#if FEATURE_FASTTAILCALL
+        // This must be a fast tail call.
+        assert(treeNode->gtCall->IsFastTailCall());
+
+        // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+        // because fast tail call requires that in-coming arg area of caller is >= out-going
+        // arg area required for tail call.
+        LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
+        assert(varDsc != nullptr);
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        varNumOut    = compiler->lvaOutgoingArgSpaceVar;
+        argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+    }
+
+    bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
+
+    if (!isStruct) // a normal non-Struct argument
+    {
+        if (varTypeIsSIMD(targetType))
+        {
+            NYI("unimplemented on LOONGARCH64 yet");
+        }
+
+        instruction storeIns  = ins_Store(targetType);
+        emitAttr    storeAttr = emitTypeSize(targetType);
+
+        // If it is contained then source must be the integer constant zero
+        if (source->isContained())
+        {
+            assert(source->OperGet() == GT_CNS_INT);
+            assert(source->AsIntConCommon()->IconValue() == 0);
+
+            emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut);
+        }
+        else
+        {
+            genConsumeReg(source);
+            if (storeIns == INS_st_w)
+            {
+                emit->emitIns_R_R_R(INS_add_w, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0);
+                storeIns  = INS_st_d;
+                storeAttr = EA_8BYTE;
+            }
+            emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut);
+        }
+        argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
+        assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+    }
+    else // We have some kind of a struct argument
+    {
+        assert(source->isContained()); // We expect that this node was marked as contained in Lower
+
+        if (source->OperGet() == GT_FIELD_LIST)
+        {
+            genPutArgStkFieldList(treeNode, varNumOut);
+        }
+        else // We must have a GT_OBJ or a GT_LCL_VAR
+        {
+            noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
+
+            var_types targetType = source->TypeGet();
+            noway_assert(varTypeIsStruct(targetType));
+
+            // Setup loReg from the internal registers that we reserved in lower.
+            //
+            regNumber loReg   = treeNode->ExtractTempReg();
+            regNumber addrReg = REG_NA;
+
+            GenTreeLclVarCommon* varNode  = nullptr;
+            GenTree*             addrNode = nullptr;
+
+            if (source->OperGet() == GT_LCL_VAR)
+            {
+                varNode = source->AsLclVarCommon();
+            }
+            else // we must have a GT_OBJ
+            {
+                assert(source->OperGet() == GT_OBJ);
+
+                addrNode = source->AsOp()->gtOp1;
+
+                // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+                //
+                if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+                {
+                    // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+                    //
+                    // We will treat this case the same as above
+                    // (i.e if we just had this GT_LCL_VAR directly as the source)
+                    // so update 'source' to point this GT_LCL_VAR_ADDR node
+                    // and continue to the codegen for the LCL_VAR node below
+                    //
+                    varNode  = addrNode->AsLclVarCommon();
+                    addrNode = nullptr;
+                }
+                else // addrNode is used
+                {
+                    // Generate code to load the address that we need into a register
+                    genConsumeAddress(addrNode);
+                    addrReg = addrNode->GetRegNum();
+                }
+            }
+
+            // Either varNode or addrNOde must have been setup above,
+            // the xor ensures that only one of the two is setup, not both
+            assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+            ClassLayout* layout;
+
+            // unsigned gcPtrCount; // The count of GC pointers in the struct
+            unsigned srcSize;
+
+            // gcPtrCount = treeNode->gtNumSlots;
+            // Setup the srcSize and layout
+            if (source->OperGet() == GT_LCL_VAR)
+            {
+                assert(varNode != nullptr);
+                LclVarDsc* varDsc = compiler->lvaGetDesc(varNode);
+
+                // This struct also must live in the stack frame
+                // And it can't live in a register (SIMD)
+                assert(varDsc->lvType == TYP_STRUCT);
+                assert(varDsc->lvOnFrame && !varDsc->lvRegister);
+
+                srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
+                                            // as that is how much stack is allocated for this LclVar
+                layout = varDsc->GetLayout();
+            }
+            else // we must have a GT_OBJ
+            {
+                assert(source->OperGet() == GT_OBJ);
+
+                // If the source is an OBJ node then we need to use the type information
+                // it provides (size and GC layout) even if the node wraps a lclvar. Due
+                // to struct reinterpretation (e.g. Unsafe.As<X, Y>) it is possible that
+                // the OBJ node has a different type than the lclvar.
+                CORINFO_CLASS_HANDLE objClass = source->AsObj()->GetLayout()->GetClassHandle();
+
+                srcSize = compiler->info.compCompHnd->getClassSize(objClass);
+                layout  = source->AsObj()->GetLayout();
+            }
+
+            unsigned structSize;
+
+            unsigned dstSize = treeNode->GetStackByteSize();
+            if (dstSize != srcSize)
+            {
+                // We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE.
+                // The dst size can be rounded up to PUTARG_STK size.
+                // The src size can be rounded up if it reads a local variable slot because the local
+                // variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE.
+                // The exception  is arm64 apple arguments because they can be passed without padding.
+                if (varNode != nullptr)
+                {
+                    // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size.
+                    const LclVarDsc* varDsc       = compiler->lvaGetDesc(varNode);
+                    const unsigned   varStackSize = varDsc->lvSize();
+                    if (varStackSize >= srcSize)
+                    {
+                        srcSize = varStackSize;
+                    }
+                }
+            }
+            if (dstSize == srcSize)
+            {
+                structSize = dstSize;
+            }
+            else
+            {
+                // With Unsafe object wwe can have different strange combinations:
+                // PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes;
+                // PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local.
+                structSize = min(dstSize, srcSize);
+            }
+
+            int      remainingSize = structSize;
+            unsigned structOffset  = 0;
+            unsigned nextIndex     = 0;
+
+            while (remainingSize > 0)
+            {
+                var_types type;
+
+                if (remainingSize >= TARGET_POINTER_SIZE)
+                {
+                    type = layout->GetGCPtrType(nextIndex);
+                }
+                else // (remainingSize < TARGET_POINTER_SIZE)
+                {
+                    // the left over size is smaller than a pointer and thus can never be a GC type
+                    assert(!layout->IsGCPtr(nextIndex));
+
+                    if (remainingSize == 1)
+                    {
+                        type = TYP_UBYTE;
+                    }
+                    else if (remainingSize == 2)
+                    {
+                        type = TYP_USHORT;
+                    }
+                    else
+                    {
+                        assert(remainingSize == 4);
+                        type = TYP_UINT;
+                    }
+                }
+                const emitAttr attr     = emitTypeSize(type);
+                const unsigned moveSize = genTypeSize(type);
+                assert(EA_SIZE_IN_BYTES(attr) == moveSize);
+
+                remainingSize -= moveSize;
+
+                instruction loadIns = ins_Load(type);
+                if (varNode != nullptr)
+                {
+                    // Load from our varNumImp source
+                    emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset);
+                }
+                else
+                {
+                    assert(loReg != addrReg);
+                    // Load from our address expression source
+                    emit->emitIns_R_R_I(loadIns, attr, loReg, addrReg, structOffset);
+                }
+
+                // Emit a store instruction to store the register into the outgoing argument area
+                instruction storeIns = ins_Store(type);
+                emit->emitIns_S_R(storeIns, attr, loReg, varNumOut, argOffsetOut);
+                argOffsetOut += moveSize;
+                assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
+
+                structOffset += moveSize;
+                nextIndex++;
+            }
+        }
+    }
+}
+
+//---------------------------------------------------------------------
+// genPutArgReg - generate code for a GT_PUTARG_REG node
+//
+// Arguments
+//    tree - the GT_PUTARG_REG node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgReg(GenTreeOp* tree)
+{
+    assert(tree->OperIs(GT_PUTARG_REG));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+
+    assert(targetType != TYP_STRUCT);
+
+    GenTree* op1 = tree->gtOp1;
+    genConsumeReg(op1);
+
+    // If child node is not already in the register we need, move it
+    if (targetReg != op1->GetRegNum())
+    {
+        if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum()))
+        {
+            inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType);
+        }
+        else if (emitter::isFloatReg(targetReg))
+        {
+            GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum());
+        }
+        else
+        {
+            assert(!emitter::isFloatReg(targetReg));
+            GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum());
+        }
+    }
+    genProduceReg(tree);
+}
+
+#if FEATURE_ARG_SPLIT
+//---------------------------------------------------------------------
+// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
+//
+// Arguments
+//    tree - the GT_PUTARG_SPLIT node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
+{
+    assert(treeNode->OperIs(GT_PUTARG_SPLIT));
+
+    GenTree* source       = treeNode->gtOp1;
+    emitter* emit         = GetEmitter();
+    unsigned varNumOut    = compiler->lvaOutgoingArgSpaceVar;
+    unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+
+    if (source->OperGet() == GT_FIELD_LIST)
+    {
+        // Evaluate each of the GT_FIELD_LIST items into their register
+        // and store their register into the outgoing argument area
+        unsigned regIndex         = 0;
+        unsigned firstOnStackOffs = UINT_MAX;
+
+        for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses())
+        {
+            GenTree*  nextArgNode = use.GetNode();
+            regNumber fieldReg    = nextArgNode->GetRegNum();
+            genConsumeReg(nextArgNode);
+
+            if (regIndex >= treeNode->gtNumRegs)
+            {
+                if (firstOnStackOffs == UINT_MAX)
+                {
+                    firstOnStackOffs = use.GetOffset();
+                }
+                var_types type = nextArgNode->TypeGet();
+                emitAttr  attr = emitTypeSize(type);
+
+                unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs;
+                // We can't write beyond the outgoing arg area
+                assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax);
+
+                // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+                // argument area
+                emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset);
+            }
+            else
+            {
+                var_types type   = treeNode->GetRegType(regIndex);
+                regNumber argReg = treeNode->GetRegNumByIdx(regIndex);
+
+                // If child node is not already in the register we need, move it
+                if (argReg != fieldReg)
+                {
+                    inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
+                }
+                regIndex++;
+            }
+        }
+    }
+    else
+    {
+        var_types targetType = source->TypeGet();
+        assert(source->OperGet() == GT_OBJ);
+        assert(varTypeIsStruct(targetType));
+
+        regNumber baseReg = treeNode->ExtractTempReg();
+        regNumber addrReg = REG_NA;
+
+        GenTreeLclVarCommon* varNode  = nullptr;
+        GenTree*             addrNode = nullptr;
+
+        addrNode = source->AsOp()->gtOp1;
+
+        // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+        //
+        if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+        {
+            // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+            //
+            // We will treat this case the same as above
+            // (i.e if we just had this GT_LCL_VAR directly as the source)
+            // so update 'source' to point this GT_LCL_VAR_ADDR node
+            // and continue to the codegen for the LCL_VAR node below
+            //
+            varNode  = addrNode->AsLclVarCommon();
+            addrNode = nullptr;
+        }
+
+        // Either varNode or addrNOde must have been setup above,
+        // the xor ensures that only one of the two is setup, not both
+        assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+        // This is the varNum for our load operations,
+        // only used when we have a struct with a LclVar source
+        unsigned srcVarNum = BAD_VAR_NUM;
+
+        if (varNode != nullptr)
+        {
+            assert(varNode->isContained());
+            srcVarNum = varNode->GetLclNum();
+            assert(srcVarNum < compiler->lvaCount);
+
+            // handle promote situation
+            LclVarDsc* varDsc = compiler->lvaTable + srcVarNum;
+
+            // This struct also must live in the stack frame
+            // And it can't live in a register (SIMD)
+            assert(varDsc->lvType == TYP_STRUCT);
+            assert(varDsc->lvOnFrame && !varDsc->lvRegister);
+
+            // We don't split HFA struct
+            assert(!varDsc->lvIsHfa());
+        }
+        else // addrNode is used
+        {
+            assert(addrNode != nullptr);
+            // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays,
+            // Generate code to load the address that we need into a register
+            genConsumeAddress(addrNode);
+            addrReg = addrNode->GetRegNum();
+
+            // If addrReg equal to baseReg, we use the last target register as alternative baseReg.
+            // Because the candidate mask for the internal baseReg does not include any of the target register,
+            // we can ensure that baseReg, addrReg, and the last target register are not all same.
+            assert(baseReg != addrReg);
+
+            // We don't split HFA struct
+            assert(!compiler->IsHfa(source->AsObj()->GetLayout()->GetClassHandle()));
+        }
+
+        ClassLayout* layout = source->AsObj()->GetLayout();
+
+        // Put on stack first
+        unsigned nextIndex     = treeNode->gtNumRegs;
+        unsigned structOffset  = nextIndex * TARGET_POINTER_SIZE;
+        int      remainingSize = treeNode->GetStackByteSize();
+        unsigned argOffsetOut  = treeNode->getArgOffset();
+
+        // remainingSize is always multiple of TARGET_POINTER_SIZE
+        assert(remainingSize % TARGET_POINTER_SIZE == 0);
+        while (remainingSize > 0)
+        {
+            var_types type = layout->GetGCPtrType(nextIndex);
+
+            if (varNode != nullptr)
+            {
+                // Load from our varNumImp source
+                emit->emitIns_R_S(INS_ld_d, emitTypeSize(type), baseReg, srcVarNum, structOffset);
+            }
+            else
+            {
+                // check for case of destroying the addrRegister while we still need it
+                assert(baseReg != addrReg);
+
+                // Load from our address expression source
+                emit->emitIns_R_R_I(INS_ld_d, emitTypeSize(type), baseReg, addrReg, structOffset);
+            }
+
+            // Emit str instruction to store the register into the outgoing argument area
+            emit->emitIns_S_R(INS_st_d, emitTypeSize(type), baseReg, varNumOut, argOffsetOut);
+
+            argOffsetOut += TARGET_POINTER_SIZE;  // We stored 4-bytes of the struct
+            assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
+            remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
+            structOffset += TARGET_POINTER_SIZE;
+            nextIndex += 1;
+        }
+
+        // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use,
+        // in case we had to reuse the last target register for it.
+        structOffset = 0;
+        for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++)
+        {
+            regNumber targetReg = treeNode->GetRegNumByIdx(idx);
+            var_types type      = treeNode->GetRegType(idx);
+
+            if (varNode != nullptr)
+            {
+                // Load from our varNumImp source
+                emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcVarNum, structOffset);
+            }
+            else
+            {
+                // check for case of destroying the addrRegister while we still need it
+                if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1)
+                {
+                    assert(targetReg != baseReg);
+                    emit->emitIns_R_R_I(INS_ori, emitActualTypeSize(type), baseReg, addrReg, 0);
+                    addrReg = baseReg;
+                }
+
+                // Load from our address expression source
+                emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset);
+            }
+            structOffset += TARGET_POINTER_SIZE;
+        }
+    }
+    genProduceReg(treeNode);
+}
+#endif // FEATURE_ARG_SPLIT
+
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+//    treeNode  -  Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    The child of store is a multi-reg call node.
+//    genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+    // Structs of size >=9 and <=16 are returned in two return registers on LOONGARCH64 and HFAs.
+    assert(varTypeIsStruct(treeNode));
+
+    // Assumption: current implementation requires that a multi-reg
+    // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+    // being promoted.
+    unsigned   lclNum = treeNode->AsLclVarCommon()->GetLclNum();
+    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+    noway_assert(varDsc->lvIsMultiRegRet);
+
+    GenTree*     op1       = treeNode->gtGetOp1();
+    GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+    GenTreeCall* call      = actualOp1->AsCall();
+    assert(call->HasMultiRegRetVal());
+
+    genConsumeRegs(op1);
+
+    const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+    unsigned              regCount     = pRetTypeDesc->GetReturnRegCount();
+
+    if (treeNode->GetRegNum() != REG_NA)
+    {
+        NYI("unimplemented on LOONGARCH64 yet");
+        // Right now the only enregistrable multi-reg return types supported are SIMD types.
+        assert(varTypeIsSIMD(treeNode));
+        assert(regCount != 0);
+
+        regNumber dst = treeNode->GetRegNum();
+
+        // Treat dst register as a homogenous vector with element size equal to the src size
+        // Insert pieces in reverse order
+        for (int i = regCount - 1; i >= 0; --i)
+        {
+            var_types type = pRetTypeDesc->GetReturnRegType(i);
+            regNumber reg  = call->GetRegNumByIdx(i);
+            if (op1->IsCopyOrReload())
+            {
+                // GT_COPY/GT_RELOAD will have valid reg for those positions
+                // that need to be copied or reloaded.
+                regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+                if (reloadReg != REG_NA)
+                {
+                    reg = reloadReg;
+                }
+            }
+
+            assert(reg != REG_NA);
+            if (varTypeIsFloating(type))
+            {
+                // If the register piece was passed in a floating point register
+                // Use a vector mov element instruction
+                // src is not a vector, so it is in the first element reg[0]
+                // mov dst[i], reg[0]
+                // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further
+                // iterations
+                // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for
+                // a temporary
+                GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0);
+            }
+            else
+            {
+                // If the register piece was passed in an integer register
+                // Use a vector mov from general purpose register instruction
+                // mov dst[i], reg
+                // This effectively moves from `reg` to `dst[i]`
+                GetEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i);
+            }
+        }
+
+        genProduceReg(treeNode);
+    }
+    else
+    {
+        // Stack store
+        int       offset = 0;
+        var_types type   = pRetTypeDesc->GetReturnRegType(0);
+        regNumber reg    = call->GetRegNumByIdx(0);
+        if (op1->IsCopyOrReload())
+        {
+            // GT_COPY/GT_RELOAD will have valid reg for those positions
+            // that need to be copied or reloaded.
+            regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+            if (reloadReg != REG_NA)
+            {
+                reg = reloadReg;
+            }
+        }
+
+        assert(reg != REG_NA);
+        GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+
+        if (1 < regCount)
+        {
+            offset = genTypeSize(type);
+            type   = pRetTypeDesc->GetReturnRegType(1);
+            reg    = call->GetRegNumByIdx(1);
+            offset = (offset < (int)genTypeSize(type)) ? genTypeSize(type) : offset;
+            GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+        }
+
+        genUpdateLife(treeNode);
+        varDsc->SetRegNum(REG_STK);
+    }
+}
+
+//------------------------------------------------------------------------
+// genRangeCheck: generate code for GT_BOUNDS_CHECK node.
+//
+void CodeGen::genRangeCheck(GenTree* oper)
+{
+    noway_assert(oper->OperIs(GT_BOUNDS_CHECK));
+    GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+    GenTree* arrLen    = bndsChk->GetArrayLength();
+    GenTree* arrIndex  = bndsChk->GetIndex();
+    GenTree* arrRef    = NULL;
+    int      lenOffset = 0;
+
+    GenTree*     src1;
+    GenTree*     src2;
+    regNumber    reg1;
+    regNumber    reg2;
+    emitJumpKind jmpKind = EJ_jmp;
+
+    genConsumeRegs(arrIndex);
+    genConsumeRegs(arrLen);
+
+    emitter*             emit     = GetEmitter();
+    GenTreeIntConCommon* intConst = nullptr;
+    if (arrIndex->isContainedIntOrIImmed())
+    {
+        src1 = arrLen;
+        src2 = arrIndex;
+        reg1 = REG_R21;
+        reg2 = src1->GetRegNum();
+
+        intConst    = src2->AsIntConCommon();
+        ssize_t imm = intConst->IconValue();
+        if (imm == INT64_MAX)
+        {
+            emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1);
+            emit->emitIns_R_R_I(INS_srli_d, EA_PTRSIZE, REG_R21, REG_R21, 1);
+        }
+        else
+        {
+            emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+        }
+    }
+    else
+    {
+        src1 = arrIndex;
+        src2 = arrLen;
+        reg1 = src1->GetRegNum();
+
+        if (src2->isContainedIntOrIImmed())
+        {
+            reg2        = REG_R21;
+            ssize_t imm = src2->AsIntConCommon()->IconValue();
+            emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+        }
+        else
+        {
+            reg2 = src2->GetRegNum();
+        }
+    }
+
+#ifdef DEBUG
+    var_types bndsChkType = genActualType(src2->TypeGet());
+    var_types src1ChkType = genActualType(src1->TypeGet());
+    // Bounds checks can only be 32 or 64 bit sized comparisons.
+    assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
+    assert(src1ChkType == TYP_INT || src1ChkType == TYP_LONG);
+#endif // DEBUG
+
+    genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, bndsChk->gtIndRngFailBB, reg2);
+}
+
+//---------------------------------------------------------------------
+// genCodeForPhysReg - generate code for a GT_PHYSREG node
+//
+// Arguments
+//    tree - the GT_PHYSREG node
+//
+// Return value:
+//    None
+//
+void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
+{
+    assert(tree->OperIs(GT_PHYSREG));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+
+    if (targetReg != tree->gtSrcReg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
+        genTransferRegGCState(targetReg, tree->gtSrcReg);
+    }
+
+    genProduceReg(tree);
+}
+
+//---------------------------------------------------------------------
+// genCodeForNullCheck - generate code for a GT_NULLCHECK node
+//
+// Arguments
+//    tree - the GT_NULLCHECK node
+//
+// Return value:
+//    None
+//
+void CodeGen::genCodeForNullCheck(GenTreeIndir* tree)
+{
+    assert(tree->OperIs(GT_NULLCHECK));
+    assert(!tree->gtOp1->isContained());
+    regNumber addrReg = genConsumeReg(tree->gtOp1);
+
+    regNumber targetReg = REG_R0;
+
+    GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, targetReg, addrReg, 0);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+//                     producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+//    arrIndex - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+    emitter*  emit      = GetEmitter();
+    GenTree*  arrObj    = arrIndex->ArrObj();
+    GenTree*  indexNode = arrIndex->IndexExpr();
+    regNumber arrReg    = genConsumeReg(arrObj);
+    regNumber indexReg  = genConsumeReg(indexNode);
+    regNumber tgtReg    = arrIndex->GetRegNum();
+    noway_assert(tgtReg != REG_NA);
+
+    // We will use a temp register to load the lower bound and dimension size values.
+
+    // regNumber tmpReg = arrIndex->GetSingleTempReg();
+    assert(tgtReg != REG_R21);
+
+    unsigned dim  = arrIndex->gtCurrDim;
+    unsigned rank = arrIndex->gtArrRank;
+    unsigned offset;
+
+    offset = compiler->eeGetMDArrayLowerBoundOffset(rank, dim);
+    emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
+    emit->emitIns_R_R_R(INS_sub_w, EA_4BYTE, tgtReg, indexReg, REG_R21);
+
+    offset = compiler->eeGetMDArrayLengthOffset(rank, dim);
+    emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
+    genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, tgtReg, nullptr, REG_R21);
+
+    genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+//    one dimension of an array reference:
+//        result = (prevDimOffset * dimSize) + effectiveIndex
+//    where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+//    arrOffset - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    dimSize and effectiveIndex are always non-negative, the former by design,
+//    and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+    GenTree*  offsetNode = arrOffset->gtOffset;
+    GenTree*  indexNode  = arrOffset->gtIndex;
+    regNumber tgtReg     = arrOffset->GetRegNum();
+
+    noway_assert(tgtReg != REG_NA);
+
+    if (!offsetNode->IsIntegralConst(0))
+    {
+        emitter*  emit      = GetEmitter();
+        regNumber offsetReg = genConsumeReg(offsetNode);
+        regNumber indexReg  = genConsumeReg(indexNode);
+        regNumber arrReg    = genConsumeReg(arrOffset->gtArrObj);
+        noway_assert(offsetReg != REG_NA);
+        noway_assert(indexReg != REG_NA);
+        noway_assert(arrReg != REG_NA);
+
+        // regNumber tmpReg = arrOffset->GetSingleTempReg();
+
+        unsigned dim    = arrOffset->gtCurrDim;
+        unsigned rank   = arrOffset->gtArrRank;
+        unsigned offset = compiler->eeGetMDArrayLengthOffset(rank, dim);
+
+        // Load tmpReg with the dimension size and evaluate
+        // tgtReg = offsetReg*tmpReg + indexReg.
+        emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
+        emit->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, REG_R21, offsetReg);
+        emit->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tgtReg, REG_R21, indexReg);
+    }
+    else
+    {
+        regNumber indexReg = genConsumeReg(indexNode);
+        if (indexReg != tgtReg)
+        {
+            GetEmitter()->emitIns_R_R_I(INS_ori, emitActualTypeSize(TYP_INT), tgtReg, indexReg, 0);
+        }
+    }
+    genProduceReg(arrOffset);
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+//    tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+//    a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTree* tree)
+{
+    instruction ins  = genGetInsForOper(tree);
+    emitAttr    size = emitActualTypeSize(tree);
+
+    assert(tree->GetRegNum() != REG_NA);
+
+    genConsumeOperands(tree->AsOp());
+
+    GenTree* operand = tree->gtGetOp1();
+    GenTree* shiftBy = tree->gtGetOp2();
+    if (!shiftBy->IsCnsIntOrI())
+    {
+        GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftBy->GetRegNum());
+    }
+    else
+    {
+        unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal;
+
+        // should check shiftByImm for loongarch32-ins.
+        unsigned immWidth = emitter::getBitWidth(size); // For LOONGARCH64, immWidth will be set to 32 or 64
+        shiftByImm &= (immWidth - 1);
+
+        if (ins == INS_slli_w && shiftByImm >= 32)
+        {
+            ins = INS_slli_d;
+        }
+        else if (ins == INS_slli_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_slli_d;
+        }
+        else if (ins == INS_srai_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_srai_d;
+        }
+        else if (ins == INS_srli_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_srli_d;
+        }
+        else if (ins == INS_rotri_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_rotri_d;
+        }
+
+        GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm);
+    }
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
+//
+// Arguments:
+//    tree - the node.
+//
+void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode)
+{
+    assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
+
+    var_types targetType = lclAddrNode->TypeGet();
+    emitAttr  size       = emitTypeSize(targetType);
+    regNumber targetReg  = lclAddrNode->GetRegNum();
+
+    // Address of a local var.
+    noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL));
+
+    GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs());
+
+    genProduceReg(lclAddrNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclFld: Produce code for a GT_LCL_FLD node.
+//
+// Arguments:
+//    tree - the GT_LCL_FLD node
+//
+void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
+{
+    assert(tree->OperIs(GT_LCL_FLD));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+    emitter*  emit       = GetEmitter();
+
+    NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
+    assert(targetReg != REG_NA);
+
+    emitAttr size   = emitTypeSize(targetType);
+    unsigned offs   = tree->GetLclOffs();
+    unsigned varNum = tree->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+
+    emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
+//
+// Arguments:
+//    tree - the GT_INDEX_ADDR node
+//
+void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
+{
+    GenTree* const base  = node->Arr();
+    GenTree* const index = node->Index();
+
+    genConsumeReg(base);
+    genConsumeReg(index);
+
+    // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers
+    // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the
+    // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until
+    // we are finished generating the code for this node.
+
+    gcInfo.gcMarkRegPtrVal(base->GetRegNum(), base->TypeGet());
+    assert(!varTypeIsGC(index->TypeGet()));
+
+    // The index is never contained, even if it is a constant.
+    assert(index->isUsedFromReg());
+
+    // Generate the bounds check if necessary.
+    if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
+    {
+        GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, base->GetRegNum(), node->gtLenOffset);
+        // if (index >= REG_R21)
+        // {
+        //   JumpToThrowHlpBlk;
+        // }
+        //
+        // sltu  REG_R21, index, REG_R21
+        // bne  REG_R21, zero, RngChkExit
+        // IndRngFail:
+        // ...
+        // RngChkExit:
+        genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21);
+    }
+
+    emitAttr attr = emitActualTypeSize(node);
+    // Can we use a shift instruction for multiply ?
+    //
+    if (isPow2(node->gtElemSize) && (node->gtElemSize < 0x10000000u))
+    {
+        regNumber tmpReg;
+        if (node->gtElemSize == 0)
+        {
+            // dest = base + index
+            tmpReg = index->GetRegNum();
+        }
+        else
+        {
+            DWORD scale;
+            BitScanForward(&scale, node->gtElemSize);
+
+            // tmpReg = base + index << scale
+            // dest = base + tmpReg
+            GetEmitter()->emitIns_R_R_I(INS_slli_d, attr, REG_R21, index->GetRegNum(), scale);
+            tmpReg = REG_R21;
+        }
+        GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), base->GetRegNum(), tmpReg);
+    }
+    else // we have to load the element size and use a MADD (multiply-add) instruction
+    {
+        // REG_R21 = element size
+        CodeGen::genSetRegToIcon(REG_R21, (ssize_t)node->gtElemSize, TYP_INT);
+
+        // dest = index * REG_R21 + base
+        if (attr == EA_4BYTE)
+        {
+            GetEmitter()->emitIns_R_R_R(INS_mul_w, EA_4BYTE, REG_R21, index->GetRegNum(), REG_R21);
+            GetEmitter()->emitIns_R_R_R(INS_add_w, attr, node->GetRegNum(), REG_R21, base->GetRegNum());
+        }
+        else
+        {
+            GetEmitter()->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), REG_R21);
+            GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), REG_R21, base->GetRegNum());
+        }
+    }
+
+    // dest = dest + elemOffs
+    GetEmitter()->emitIns_R_R_I(INS_addi_d, attr, node->GetRegNum(), node->GetRegNum(), node->gtElemOffset);
+
+    gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
+
+    genProduceReg(node);
+}
+
+//------------------------------------------------------------------------
+// genCodeForIndir: Produce code for a GT_IND node.
+//
+// Arguments:
+//    tree - the GT_IND node
+//
+void CodeGen::genCodeForIndir(GenTreeIndir* tree)
+{
+    assert(tree->OperIs(GT_IND));
+
+#ifdef FEATURE_SIMD
+    // Handling of Vector3 type values loaded through indirection.
+    if (tree->TypeGet() == TYP_SIMD12)
+    {
+        genLoadIndTypeSIMD12(tree);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    var_types   type      = tree->TypeGet();
+    instruction ins       = ins_Load(type);
+    instruction ins2      = INS_none;
+    regNumber   targetReg = tree->GetRegNum();
+    regNumber   tmpReg    = targetReg;
+    emitAttr    attr      = emitActualTypeSize(type);
+    int         offset    = 0;
+
+    genConsumeAddress(tree->Addr());
+
+    if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+    {
+        instGen_MemoryBarrier(BARRIER_FULL);
+    }
+
+    GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree);
+
+    genProduceReg(tree);
+}
+
+//----------------------------------------------------------------------------------
+// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call
+//
+// Arguments:
+//    cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
+//
+// Preconditions:
+//   The register assignments have been set appropriately.
+//   This is validated by genConsumeBlockOp().
+//
+void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
+{
+    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
+    // genConsumeBlockOp takes care of this for us.
+    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+    if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a full memory barrier before a volatile CpBlk operation
+        instGen_MemoryBarrier();
+    }
+
+    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+
+    if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a INS_BARRIER_RMB after a volatile CpBlk operation
+        instGen_MemoryBarrier(BARRIER_FULL);
+    }
+}
+
+//----------------------------------------------------------------------------------
+// genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll
+//
+// Arguments:
+//    cpBlkNode  -  Copy block node
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//  The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes.
+//
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+    assert(cpBlkNode->OperIs(GT_STORE_BLK));
+
+    unsigned  dstLclNum      = BAD_VAR_NUM;
+    regNumber dstAddrBaseReg = REG_NA;
+    int       dstOffset      = 0;
+    GenTree*  dstAddr        = cpBlkNode->Addr();
+
+    if (!dstAddr->isContained())
+    {
+        dstAddrBaseReg = genConsumeReg(dstAddr);
+    }
+    else if (dstAddr->OperIsAddrMode())
+    {
+        assert(!dstAddr->AsAddrMode()->HasIndex());
+
+        dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base());
+        dstOffset      = dstAddr->AsAddrMode()->Offset();
+    }
+    else
+    {
+        assert(dstAddr->OperIsLocalAddr());
+        dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum();
+        dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs();
+    }
+
+    unsigned  srcLclNum      = BAD_VAR_NUM;
+    regNumber srcAddrBaseReg = REG_NA;
+    int       srcOffset      = 0;
+    GenTree*  src            = cpBlkNode->Data();
+
+    assert(src->isContained());
+
+    if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD))
+    {
+        srcLclNum = src->AsLclVarCommon()->GetLclNum();
+        srcOffset = src->AsLclVarCommon()->GetLclOffs();
+    }
+    else
+    {
+        assert(src->OperIs(GT_IND));
+        GenTree* srcAddr = src->AsIndir()->Addr();
+
+        if (!srcAddr->isContained())
+        {
+            srcAddrBaseReg = genConsumeReg(srcAddr);
+        }
+        else if (srcAddr->OperIsAddrMode())
+        {
+            srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base());
+            srcOffset      = srcAddr->AsAddrMode()->Offset();
+        }
+        else
+        {
+            assert(srcAddr->OperIsLocalAddr());
+            srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum();
+            srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs();
+        }
+    }
+
+    if (cpBlkNode->IsVolatile())
+    {
+        // issue a full memory barrier before a volatile CpBlk operation
+        instGen_MemoryBarrier();
+    }
+
+    emitter* emit = GetEmitter();
+    unsigned size = cpBlkNode->GetLayout()->GetSize();
+
+    assert(size <= INT32_MAX);
+    assert(srcOffset < INT32_MAX - static_cast<int>(size));
+    assert(dstOffset < INT32_MAX - static_cast<int>(size));
+
+    regNumber tempReg = cpBlkNode->ExtractTempReg(RBM_ALLINT);
+
+    if (size >= 2 * REGSIZE_BYTES)
+    {
+        regNumber tempReg2 = REG_R21;
+
+        for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize;
+             size -= regSize, srcOffset += regSize, dstOffset += regSize)
+        {
+            if (srcLclNum != BAD_VAR_NUM)
+            {
+                emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg, srcLclNum, srcOffset);
+                emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg2, srcLclNum, srcOffset + 8);
+            }
+            else
+            {
+                emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg, srcAddrBaseReg, srcOffset);
+                emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg2, srcAddrBaseReg, srcOffset + 8);
+            }
+
+            if (dstLclNum != BAD_VAR_NUM)
+            {
+                emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg, dstLclNum, dstOffset);
+                emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg2, dstLclNum, dstOffset + 8);
+            }
+            else
+            {
+                emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg, dstAddrBaseReg, dstOffset);
+                emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg2, dstAddrBaseReg, dstOffset + 8);
+            }
+        }
+    }
+
+    for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize)
+    {
+        while (regSize > size)
+        {
+            regSize /= 2;
+        }
+
+        instruction loadIns;
+        instruction storeIns;
+        emitAttr    attr;
+
+        switch (regSize)
+        {
+            case 1:
+                loadIns  = INS_ld_b;
+                storeIns = INS_st_b;
+                attr     = EA_4BYTE;
+                break;
+            case 2:
+                loadIns  = INS_ld_h;
+                storeIns = INS_st_h;
+                attr     = EA_4BYTE;
+                break;
+            case 4:
+                loadIns  = INS_ld_w;
+                storeIns = INS_st_w;
+                attr     = EA_ATTR(regSize);
+                break;
+            case 8:
+                loadIns  = INS_ld_d;
+                storeIns = INS_st_d;
+                attr     = EA_ATTR(regSize);
+                break;
+            default:
+                unreached();
+        }
+
+        if (srcLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_R_S(loadIns, attr, tempReg, srcLclNum, srcOffset);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(loadIns, attr, tempReg, srcAddrBaseReg, srcOffset);
+        }
+
+        if (dstLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_S_R(storeIns, attr, tempReg, dstLclNum, dstOffset);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(storeIns, attr, tempReg, dstAddrBaseReg, dstOffset);
+        }
+    }
+
+    if (cpBlkNode->IsVolatile())
+    {
+        // issue a load barrier after a volatile CpBlk operation
+        instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call
+//
+// Arguments:
+//    initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
+//
+// Preconditions:
+//   The register assignments have been set appropriately.
+//   This is validated by genConsumeBlockOp().
+//
+void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode)
+{
+    // Size goes in arg2, source address goes in arg1, and size goes in arg2.
+    // genConsumeBlockOp takes care of this for us.
+    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+    if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a full memory barrier before a volatile initBlock Operation
+        instGen_MemoryBarrier();
+    }
+
+    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+}
+
+// Generate code for a load from some address + offset
+//   base: tree node which can be either a local address or arbitrary node
+//   offset: distance from the base from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+{
+    emitter* emit = GetEmitter();
+
+    if (base->OperIsLocalAddr())
+    {
+        if (base->gtOper == GT_LCL_FLD_ADDR)
+        {
+            offset += base->AsLclFld()->GetLclOffs();
+        }
+        emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset);
+    }
+    else
+    {
+        emit->emitIns_R_R_I(ins, size, dst, base->GetRegNum(), offset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCall: Produce code for a GT_CALL node
+//
+void CodeGen::genCall(GenTreeCall* call)
+{
+    // Consume all the arg regs
+    for (GenTreeCall::Use& use : call->LateArgs())
+    {
+        GenTree* argNode = use.GetNode();
+
+        fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+        assert(curArgTabEntry);
+
+        // GT_RELOAD/GT_COPY use the child node
+        argNode = argNode->gtSkipReloadOrCopy();
+
+        if (curArgTabEntry->GetRegNum() == REG_STK)
+        {
+            continue;
+        }
+
+        // Deal with multi register passed struct args.
+        if (argNode->OperGet() == GT_FIELD_LIST)
+        {
+            regNumber argReg = curArgTabEntry->GetRegNum();
+            for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses())
+            {
+                GenTree* putArgRegNode = use.GetNode();
+                assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+                genConsumeReg(putArgRegNode);
+                var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL;
+                inst_Mov(dstType, argReg, putArgRegNode->GetRegNum(), /* canSkip */ true);
+
+                argReg = genRegArgNext(argReg);
+            }
+        }
+        else if (curArgTabEntry->IsSplit())
+        {
+            NYI("unimplemented on LOONGARCH64 yet");
+        }
+        else
+        {
+            regNumber argReg = curArgTabEntry->GetRegNum();
+            genConsumeReg(argNode);
+            var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL;
+            inst_Mov(dstType, argReg, argNode->GetRegNum(), /* canSkip */ true);
+        }
+    }
+
+    // Insert a null check on "this" pointer if asked.
+    if (call->NeedsNullCheck())
+    {
+        const regNumber regThis = genGetThisArgReg(call);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, regThis, 0);
+    }
+
+    // If fast tail call, then we are done here, we just have to load the call
+    // target into the right registers. We ensure in RA that target is loaded
+    // into a volatile register that won't be restored by epilog sequence.
+    if (call->IsFastTailCall())
+    {
+        GenTree* target = getCallTarget(call, nullptr);
+
+        if (target != nullptr)
+        {
+            // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+            genConsumeReg(target);
+        }
+#ifdef FEATURE_READYTORUN
+        else if (call->IsR2ROrVirtualStubRelativeIndir())
+        {
+            assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) ||
+                   ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE)));
+            assert(call->gtControlExpr == nullptr);
+
+            regNumber tmpReg = call->GetSingleTempReg();
+            // Register where we save call address in should not be overridden by epilog.
+            assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == tmpReg);
+
+            regNumber callAddrReg =
+                call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM;
+            GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg);
+            // We will use this again when emitting the jump in genCallInstruction in the epilog
+            call->gtRsvdRegs |= genRegMask(tmpReg);
+        }
+#endif
+
+        return;
+    }
+
+    // For a pinvoke to unmanaged code we emit a label to clear
+    // the GC pointer state before the callsite.
+    // We can't utilize the typical lazy killing of GC pointers
+    // at (or inside) the callsite.
+    if (compiler->killGCRefs(call))
+    {
+        genDefineTempLabel(genCreateTempLabel());
+    }
+
+    genCallInstruction(call);
+
+    // for pinvoke/intrinsic/tailcalls we may have needed to get the address of
+    // a label. In case it is indirect with CFG enabled make sure we do not get
+    // the address after the validation but only after the actual call that
+    // comes after.
+    if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL))
+    {
+        genDefineInlineTempLabel(genPendingCallLabel);
+        genPendingCallLabel = nullptr;
+    }
+
+#ifdef DEBUG
+    // We should not have GC pointers in killed registers live around the call.
+    // GC info for arg registers were cleared when consuming arg nodes above
+    // and LSRA should ensure it for other trashed registers.
+    regMaskTP killMask = RBM_CALLEE_TRASH;
+    if (call->IsHelperCall())
+    {
+        CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+        killMask                 = compiler->compHelperCallKillSet(helpFunc);
+    }
+
+    assert((gcInfo.gcRegGCrefSetCur & killMask) == 0);
+    assert((gcInfo.gcRegByrefSetCur & killMask) == 0);
+#endif
+
+    var_types returnType = call->TypeGet();
+    if (returnType != TYP_VOID)
+    {
+        regNumber returnReg;
+
+        if (call->HasMultiRegRetVal())
+        {
+            const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+            assert(pRetTypeDesc != nullptr);
+            unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+            // If regs allocated to call node are different from ABI return
+            // regs in which the call has returned its result, move the result
+            // to regs allocated to call node.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                var_types regType      = pRetTypeDesc->GetReturnRegType(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                regNumber allocatedReg = call->GetRegNumByIdx(i);
+                inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true);
+            }
+        }
+        else
+        {
+            if (varTypeUsesFloatArgReg(returnType))
+            {
+                returnReg = REG_FLOATRET;
+            }
+            else
+            {
+                returnReg = REG_INTRET;
+            }
+
+            if (call->GetRegNum() != returnReg)
+            {
+                inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false);
+            }
+        }
+
+        genProduceReg(call);
+    }
+
+    // If there is nothing next, that means the result is thrown away, so this value is not live.
+    // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+    if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+    {
+        gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCallInstruction - Generate instructions necessary to transfer control to the call.
+//
+// Arguments:
+//    call - the GT_CALL node
+//
+// Remaks:
+//   For tailcalls this function will generate a jump.
+//
+void CodeGen::genCallInstruction(GenTreeCall* call)
+{
+    // Determine return value size(s).
+    const ReturnTypeDesc* pRetTypeDesc  = call->GetReturnTypeDesc();
+    emitAttr              retSize       = EA_PTRSIZE;
+    emitAttr              secondRetSize = EA_UNKNOWN;
+
+    if (call->HasMultiRegRetVal())
+    {
+        retSize       = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
+        secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
+    }
+    else
+    {
+        assert(call->gtType != TYP_STRUCT);
+
+        if (call->gtType == TYP_REF)
+        {
+            retSize = EA_GCREF;
+        }
+        else if (call->gtType == TYP_BYREF)
+        {
+            retSize = EA_BYREF;
+        }
+    }
+
+    DebugInfo di;
+    // We need to propagate the debug information to the call instruction, so we can emit
+    // an IL to native mapping record for the call, to support managed return value debugging.
+    // We don't want tail call helper calls that were converted from normal calls to get a record,
+    // so we skip this hash table lookup logic in that case.
+    if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall())
+    {
+        (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di);
+    }
+
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+    // Pass the call signature information down into the emitter so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    if (call->gtCallType != CT_HELPER)
+    {
+        sigInfo = call->callSig;
+    }
+
+    if (call->IsFastTailCall())
+    {
+        regMaskTP trashedByEpilog = RBM_CALLEE_SAVED;
+
+        // The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no
+        // non-standard args that may be trash if this is a tailcall.
+        if (compiler->getNeedsGSSecurityCookie())
+        {
+            trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0);
+            trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1);
+        }
+
+        for (unsigned i = 0; i < call->fgArgInfo->ArgCount(); i++)
+        {
+            fgArgTabEntry* entry = call->fgArgInfo->GetArgEntry(i);
+            for (unsigned j = 0; j < entry->numRegs; j++)
+            {
+                regNumber reg = entry->GetRegNum(j);
+                if ((trashedByEpilog & genRegMask(reg)) != 0)
+                {
+                    JITDUMP("Tail call node:\n");
+                    DISPTREE(call);
+                    JITDUMP("Register used: %s\n", getRegName(reg));
+                    assert(!"Argument to tailcall may be trashed by epilog");
+                }
+            }
+        }
+    }
+#endif // DEBUG
+    CORINFO_METHOD_HANDLE methHnd;
+    GenTree*              target = getCallTarget(call, &methHnd);
+
+    if (target != nullptr)
+    {
+        // A call target can not be a contained indirection
+        assert(!target->isContainedIndir());
+
+        // For fast tailcall we have already consumed the target. We ensure in
+        // RA that the target was allocated into a volatile register that will
+        // not be messed up by epilog sequence.
+        if (!call->IsFastTailCall())
+        {
+            genConsumeReg(target);
+        }
+
+        // We have already generated code for gtControlExpr evaluating it into a register.
+        // We just need to emit "call reg" in this case.
+        //
+        assert(genIsValidIntReg(target->GetRegNum()));
+
+        // clang-format off
+        genEmitCall(emitter::EC_INDIR_R,
+                    methHnd,
+                    INDEBUG_LDISASM_COMMA(sigInfo)
+                    nullptr, // addr
+                    retSize
+                    MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                    di,
+                    target->GetRegNum(),
+                    call->IsFastTailCall());
+        // clang-format on
+    }
+    else
+    {
+        // If we have no target and this is a call with indirection cell then
+        // we do an optimization where we load the call address directly from
+        // the indirection cell instead of duplicating the tree. In BuildCall
+        // we ensure that get an extra register for the purpose. Note that for
+        // CFG the call might have changed to
+        // CORINFO_HELP_DISPATCH_INDIRECT_CALL in which case we still have the
+        // indirection cell but we should not try to optimize.
+        regNumber callThroughIndirReg = REG_NA;
+        if (!call->IsHelperCall(compiler, CORINFO_HELP_DISPATCH_INDIRECT_CALL))
+        {
+            callThroughIndirReg = getCallIndirectionCellReg(call);
+        }
+
+        if (callThroughIndirReg != REG_NA)
+        {
+            assert(call->IsR2ROrVirtualStubRelativeIndir());
+            regNumber targetAddrReg = call->GetSingleTempReg();
+            // For fast tailcalls we have already loaded the call target when processing the call node.
+            if (!call->IsFastTailCall())
+            {
+                GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), targetAddrReg,
+                                          callThroughIndirReg);
+            }
+            else
+            {
+                // Register where we save call address in should not be overridden by epilog.
+                assert((targetAddrReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == targetAddrReg);
+            }
+
+            // We have now generated code loading the target address from the indirection cell into `targetAddrReg`.
+            // We just need to emit "bl targetAddrReg" in this case.
+            //
+            assert(genIsValidIntReg(targetAddrReg));
+
+            // clang-format off
+            genEmitCall(emitter::EC_INDIR_R,
+                        methHnd,
+                        INDEBUG_LDISASM_COMMA(sigInfo)
+                        nullptr, // addr
+                        retSize
+                        MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                        di,
+                        targetAddrReg,
+                        call->IsFastTailCall());
+            // clang-format on
+        }
+        else
+        {
+            // Generate a direct call to a non-virtual user defined or helper method
+            assert(call->gtCallType == CT_HELPER || call->gtCallType == CT_USER_FUNC);
+
+            void* addr = nullptr;
+#ifdef FEATURE_READYTORUN
+            if (call->gtEntryPoint.addr != NULL)
+            {
+                assert(call->gtEntryPoint.accessType == IAT_VALUE);
+                addr = call->gtEntryPoint.addr;
+            }
+            else
+#endif // FEATURE_READYTORUN
+                if (call->gtCallType == CT_HELPER)
+            {
+                CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+                noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+                void* pAddr = nullptr;
+                addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+                assert(pAddr == nullptr);
+            }
+            else
+            {
+                // Direct call to a non-virtual user function.
+                addr = call->gtDirectCallAddress;
+            }
+
+            assert(addr != nullptr);
+
+            // clang-format off
+            genEmitCall(emitter::EC_FUNC_TOKEN,
+                        methHnd,
+                        INDEBUG_LDISASM_COMMA(sigInfo)
+                        addr,
+                        retSize
+                        MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                        di,
+                        REG_NA,
+                        call->IsFastTailCall());
+            // clang-format on
+        }
+    }
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTree* jmp)
+{
+    assert(jmp->OperGet() == GT_JMP);
+    assert(compiler->compJmpOpUsed);
+
+    // If no arguments, nothing to do
+    if (compiler->info.compArgsCount == 0)
+    {
+        return;
+    }
+
+    // Make sure register arguments are in their initial registers
+    // and stack arguments are put back as well.
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // First move any en-registered stack arguments back to the stack.
+    // At the same time any reg arg not in correct reg is moved back to its stack location.
+    //
+    // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+    // But that would require us to deal with circularity while moving values around.  Spilling
+    // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+    // are not frequent.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK))
+        {
+            // Skip reg args which are already in its right register for jmp call.
+            // If not, we will spill such args to their stack locations.
+            //
+            // If we need to generate a tail call profiler hook, then spill all
+            // arg regs to free them up for the callback.
+            if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg()))
+            {
+                continue;
+            }
+        }
+        else if (varDsc->GetRegNum() == REG_STK)
+        {
+            // Skip args which are currently living in stack.
+            continue;
+        }
+
+        // If we came here it means either a reg argument not in the right register or
+        // a stack argument currently living in a register.  In either case the following
+        // assert should hold.
+        assert(varDsc->GetRegNum() != REG_STK);
+        assert(varDsc->TypeGet() != TYP_STRUCT);
+        var_types storeType = varDsc->GetStackSlotHomeType();
+        emitAttr  storeSize = emitActualTypeSize(storeType);
+
+        GetEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->GetRegNum(), varNum, 0);
+        // Update GetRegNum() life and GC info to indicate GetRegNum() is dead and varDsc stack slot is going live.
+        // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it.
+        // Therefore manually update life of varDsc->GetRegNum().
+        regMaskTP tempMask = genRegMask(varDsc->GetRegNum());
+        regSet.RemoveMaskVars(tempMask);
+        gcInfo.gcMarkRegSetNpt(tempMask);
+        if (compiler->lvaIsGCTracked(varDsc))
+        {
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+        }
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // At this point all arg regs are free.
+    // Emit tail call profiler callback.
+    genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+    // Next move any un-enregistered register arguments back to their register.
+    unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        // Skip if arg not passed in a register.
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        // Register argument
+        noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+        // Is register argument already in the right register?
+        // If not load it from its stack location.
+        regNumber argReg     = varDsc->GetArgReg(); // incoming arg register
+        regNumber argRegNext = REG_NA;
+
+        if (varDsc->GetRegNum() != argReg)
+        {
+            var_types loadType = TYP_UNDEF;
+
+            // NOTE for LOONGARCH: not supports the HFA.
+            assert(!varDsc->lvIsHfaRegArg());
+            {
+                if (varTypeIsStruct(varDsc))
+                {
+                    // Must be <= 16 bytes or else it wouldn't be passed in registers,
+                    // which can be bigger (and is handled above).
+                    noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= 16);
+                    if (emitter::isFloatReg(argReg))
+                    {
+                        loadType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
+                    }
+                    else
+                        loadType = varDsc->GetLayout()->GetGCPtrType(0);
+                }
+                else
+                {
+                    loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
+                }
+
+                emitAttr loadSize = emitActualTypeSize(loadType);
+                GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
+
+                // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+                // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting
+                // it.
+                // Therefore manually update life of argReg.  Note that GT_JMP marks the end of the basic block
+                // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+                regSet.AddMaskVars(genRegMask(argReg));
+                gcInfo.gcMarkRegPtrVal(argReg, loadType);
+
+                if (varDsc->GetOtherArgReg() < REG_STK)
+                {
+                    // Restore the second register.
+                    argRegNext = varDsc->GetOtherArgReg();
+
+                    if (emitter::isFloatReg(argRegNext))
+                    {
+                        loadType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE;
+                    }
+                    else
+                        loadType = varDsc->GetLayout()->GetGCPtrType(1);
+
+                    loadSize = emitActualTypeSize(loadType);
+                    int offs = loadSize == EA_4BYTE ? 4 : 8;
+                    GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, offs);
+
+                    regSet.AddMaskVars(genRegMask(argRegNext));
+                    gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
+                }
+
+                if (compiler->lvaIsGCTracked(varDsc))
+                {
+                    VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+                }
+            }
+        }
+
+        if (compiler->info.compIsVarArgs)
+        {
+            NYI_LOONGARCH64("genJmpMethod unsupports compIsVarArgs");
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntCastOverflowCheck: Generate overflow checking code for an integer cast.
+//
+// Arguments:
+//    cast - The GT_CAST node
+//    desc - The cast description
+//    reg  - The register containing the value to check
+//
+void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg)
+{
+    switch (desc.CheckKind())
+    {
+        case GenIntCastDesc::CHECK_POSITIVE:
+        {
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, REG_R0);
+        }
+        break;
+
+        case GenIntCastDesc::CHECK_UINT_RANGE:
+        {
+            // We need to check if the value is not greater than 0xFFFFFFFF
+            // if the upper 32 bits are zero.
+            ssize_t imm = -1;
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
+
+            GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 32);
+            GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+        }
+        break;
+
+        case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE:
+        {
+            // We need to check if the value is not greater than 0x7FFFFFFF
+            // if the upper 33 bits are zero.
+            // instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL);
+            ssize_t imm = -1;
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
+
+            GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 31);
+
+            GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+        }
+        break;
+
+        case GenIntCastDesc::CHECK_INT_RANGE:
+        {
+            const regNumber tempReg = cast->GetSingleTempReg();
+            assert(tempReg != reg);
+            GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MAX);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, tempReg, nullptr, reg);
+
+            GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MIN);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, tempReg);
+        }
+        break;
+
+        default:
+        {
+            assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE);
+            const int   castMaxValue = desc.CheckSmallIntMax();
+            const int   castMinValue = desc.CheckSmallIntMin();
+            instruction ins;
+
+            if (castMaxValue > 2047)
+            {
+                assert((castMaxValue == 32767) || (castMaxValue == 65535));
+                GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1);
+                ins = castMinValue == 0 ? INS_bgeu : INS_bge;
+                genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21);
+            }
+            else
+            {
+                GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue);
+                ins = castMinValue == 0 ? INS_bltu : INS_blt;
+                genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg);
+            }
+
+            if (castMinValue != 0)
+            {
+                if (emitter::isValidSimm12(castMinValue))
+                {
+                    GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue);
+                }
+                else
+                {
+                    GetEmitter()->emitIns_I_la(EA_8BYTE, REG_R21, castMinValue);
+                    GetEmitter()->emitIns_R_R_R(INS_slt, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, REG_R21);
+                }
+                genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+            }
+        }
+        break;
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast, with or without overflow check.
+//
+// Arguments:
+//    cast - The GT_CAST node
+//
+// Assumptions:
+//    The cast node is not a contained node and must have an assigned register.
+//    Neither the source nor target type can be a floating point type.
+//
+// TODO-LOONGARCH64-CQ: Allow castOp to be a contained node without an assigned register.
+//
+void CodeGen::genIntToIntCast(GenTreeCast* cast)
+{
+    genConsumeRegs(cast->gtGetOp1());
+
+    emitter*            emit    = GetEmitter();
+    var_types           dstType = cast->CastToType();
+    var_types           srcType = genActualType(cast->gtGetOp1()->TypeGet());
+    const regNumber     srcReg  = cast->gtGetOp1()->GetRegNum();
+    const regNumber     dstReg  = cast->GetRegNum();
+    const unsigned char pos     = 0;
+    const unsigned char size    = 32;
+
+    assert(genIsValidIntReg(srcReg));
+    assert(genIsValidIntReg(dstReg));
+
+    GenIntCastDesc desc(cast);
+
+    if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
+    {
+        genIntCastOverflowCheck(cast, desc, srcReg);
+    }
+
+    if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
+    {
+        instruction ins;
+
+        switch (desc.ExtendKind())
+        {
+            case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
+                if (desc.ExtendSrcSize() == 1)
+                {
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 7, pos);
+                }
+                else
+                {
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 15, pos);
+                }
+                break;
+            case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
+                ins = (desc.ExtendSrcSize() == 1) ? INS_ext_w_b : INS_ext_w_h;
+                emit->emitIns_R_R(ins, EA_PTRSIZE, dstReg, srcReg);
+                break;
+
+            case GenIntCastDesc::ZERO_EXTEND_INT:
+                emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 31, pos);
+                break;
+            case GenIntCastDesc::SIGN_EXTEND_INT:
+                emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
+                break;
+
+            default:
+                assert(desc.ExtendKind() == GenIntCastDesc::COPY);
+                if (srcType == TYP_INT)
+                {
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
+                }
+                else
+                {
+                    emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0);
+                }
+                break;
+        }
+    }
+
+    genProduceReg(cast);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    The cast is between float and double.
+//
+void CodeGen::genFloatToFloatCast(GenTree* treeNode)
+{
+    // float <--> double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->GetRegNum();
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTree* op1 = treeNode->AsOp()->gtOp1;
+    assert(!op1->isContained());                  // Cannot be contained
+    assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    genConsumeOperands(treeNode->AsOp());
+
+    // treeNode must be a reg
+    assert(!treeNode->isContained());
+
+    if (srcType != dstType)
+    {
+        instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_s  // convert Single to Double
+                                                 : INS_fcvt_s_d; // convert Double to Single
+
+        GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum());
+    }
+    else if (treeNode->GetRegNum() != op1->GetRegNum())
+    {
+        // If double to double cast or float to float cast. Emit a move instruction.
+        instruction ins = (srcType == TYP_FLOAT) ? INS_fmov_s : INS_fmov_d;
+        GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum());
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCreateAndStoreGCInfo: Create and record GC Info for the function.
+//
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+                                      unsigned prologSize,
+                                      unsigned epilogSize DEBUGARG(void* codePtr))
+{
+    IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
+    GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
+        GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+    assert(gcInfoEncoder != nullptr);
+
+    // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+    gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+    // We keep the call count for the second call to gcMakeRegPtrTable() below.
+    unsigned callCnt = 0;
+
+    // First we figure out the encoder ID's for the stack slots and registers.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
+
+    // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+    gcInfoEncoder->FinalizeSlotIds();
+
+    // Now we can actually use those slot ID's to declare live ranges.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
+
+    if (compiler->opts.compDbgEnC)
+    {
+        // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+        // which is:
+        //  -return address
+        //  -saved off RBP
+        //  -saved 'this' pointer and bool for synchronized methods
+
+        // 4 slots for RBP + return address + RSI + RDI
+        int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+        if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+        {
+            if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+            {
+                preservedAreaSize += REGSIZE_BYTES;
+            }
+
+            preservedAreaSize += 1; // bool for synchronized methods
+        }
+
+        // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+        // frame
+        gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+    }
+
+    if (compiler->opts.IsReversePInvoke())
+    {
+        unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
+        assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM);
+        const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber);
+        gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset());
+    }
+
+    gcInfoEncoder->Build();
+
+    // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+    // let's save the values anyway for debugging purposes
+    compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+    compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
+{
+    assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
+
+    if (blkOp->OperIs(GT_STORE_OBJ))
+    {
+        assert(!blkOp->gtBlkOpGcUnsafe);
+        assert(blkOp->OperIsCopyBlkOp());
+        assert(blkOp->AsObj()->GetLayout()->HasGCPtr());
+        genCodeForCpObj(blkOp->AsObj());
+        return;
+    }
+    if (blkOp->gtBlkOpGcUnsafe)
+    {
+        GetEmitter()->emitDisableGC();
+    }
+    bool isCopyBlk = blkOp->OperIsCopyBlkOp();
+
+    switch (blkOp->gtBlkOpKind)
+    {
+        case GenTreeBlk::BlkOpKindHelper:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlkHelper(blkOp);
+            }
+            else
+            {
+                genCodeForInitBlkHelper(blkOp);
+            }
+            break;
+
+        case GenTreeBlk::BlkOpKindUnroll:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlkUnroll(blkOp);
+            }
+            else
+            {
+                genCodeForInitBlkUnroll(blkOp);
+            }
+            break;
+
+        default:
+            unreached();
+    }
+
+    if (blkOp->gtBlkOpGcUnsafe)
+    {
+        GetEmitter()->emitEnableGC();
+    }
+}
+
+//------------------------------------------------------------------------
+// genLeaInstruction: Produce code for a GT_LEA node.
+//
+// Arguments:
+//    lea - the node
+//
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+    genConsumeOperands(lea);
+    emitter* emit   = GetEmitter();
+    emitAttr size   = emitTypeSize(lea);
+    int      offset = lea->Offset();
+
+    // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
+    // tmpReg = indexReg << scale;
+    // destReg = baseReg + tmpReg;
+    // destReg = destReg + offset;
+    //
+    // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
+    //             addressing mode instruction.  Currently we're 'cheating' by producing one or more
+    //             instructions to generate the addressing mode so we need to modify lowering to
+    //             produce LEAs that are a 1:1 relationship to the LOONGARCH64 architecture.
+    if (lea->Base() && lea->Index())
+    {
+        GenTree* memBase = lea->Base();
+        GenTree* index   = lea->Index();
+
+        assert(isPow2(lea->gtScale));
+
+        regNumber tmpReg;
+        if (lea->gtScale == 0)
+        {
+            tmpReg = index->GetRegNum();
+        }
+        else
+        {
+            DWORD scale;
+            BitScanForward(&scale, lea->gtScale);
+            assert(scale <= 4);
+
+            emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), scale);
+            tmpReg = REG_R21;
+        }
+
+        if (offset != 0)
+        {
+            if (emitter::isValidSimm12(offset))
+            {
+                emit->emitIns_R_R_I(INS_addi_d, size, tmpReg, tmpReg, offset);
+            }
+            else
+            {
+                regNumber tmpReg2 = lea->GetSingleTempReg();
+
+                noway_assert(tmpReg2 != index->GetRegNum());
+                noway_assert(tmpReg2 != memBase->GetRegNum());
+                noway_assert(tmpReg2 != tmpReg);
+
+                // compute the large offset.
+                emit->emitIns_I_la(EA_PTRSIZE, tmpReg2, offset);
+                emit->emitIns_R_R_R(INS_add_d, size, tmpReg, tmpReg, tmpReg2);
+            }
+        }
+
+        emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
+    }
+    else if (lea->Base())
+    {
+        GenTree* memBase = lea->Base();
+
+        if (emitter::isValidSimm12(offset))
+        {
+            if (offset != 0)
+            {
+                // Then compute target reg from [memBase + offset]
+                emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), memBase->GetRegNum(), offset);
+            }
+            else // offset is zero
+            {
+                if (lea->GetRegNum() != memBase->GetRegNum())
+                {
+                    emit->emitIns_R_R_I(INS_ori, size, lea->GetRegNum(), memBase->GetRegNum(), 0);
+                }
+            }
+        }
+        else
+        {
+            // We require a tmpReg to hold the offset
+            regNumber tmpReg = lea->GetSingleTempReg();
+
+            // First load tmpReg with the large offset constant
+            emit->emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+
+            // Then compute target reg from [memBase + tmpReg]
+            emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
+        }
+    }
+    else if (lea->Index())
+    {
+        // If we encounter a GT_LEA node without a base it means it came out
+        // when attempting to optimize an arbitrary arithmetic expression during lower.
+        // This is currently disabled in LOONGARCH64 since we need to adjust lower to account
+        // for the simpler instructions LOONGARCH64 supports.
+        // TODO-LOONGARCH64-CQ:  Fix this and let LEA optimize arithmetic trees too.
+        assert(!"We shouldn't see a baseless address computation during CodeGen for LOONGARCH64");
+    }
+
+    genProduceReg(lea);
+}
+
+//------------------------------------------------------------------------
+// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
+//
+// Arguments:
+//    delta - the offset to add to the current stack pointer to establish the frame pointer
+//    reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
+
+void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (delta == 0)
+    {
+        GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+    }
+    else
+    {
+        assert(emitter::isValidSimm12(delta));
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+    }
+
+    if (reportUnwindData)
+    {
+        compiler->unwindSetFrameReg(REG_FPBASE, delta);
+    }
+}
+
+//------------------------------------------------------------------------
+// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP.
+//
+// Notes:
+//      On LOONGARCH64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
+//      This is done before anything has been pushed. The previous frame might have a large outgoing argument
+//      space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
+//      not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
+//      there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
+//      page by default, so we need to be more careful. We do an extra probe if we might not have probed
+//      recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
+//      on Windows as well just to be consistent, even though it should not be necessary.
+//
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (frameSize == 0)
+    {
+        return;
+    }
+
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
+    // if the next action on the stack might subtract from SP first, before touching the current SP, then
+    // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
+    // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
+    // but we don't alter SP.
+    target_size_t lastTouchDelta = 0;
+
+    assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+
+    if (frameSize < pageSize)
+    {
+        lastTouchDelta = frameSize;
+    }
+    else if (frameSize < 3 * pageSize)
+    {
+        // We don't need a register for the target of the dummy load
+        // ld_w $0,offset(base) will ignor the addr-exception.
+        regNumber rTemp = REG_R0;
+        lastTouchDelta  = frameSize;
+
+        for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
+        {
+            // Generate:
+            //    lw rTemp, -probeOffset(SP)  // load into initReg
+            GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
+            GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, initReg);
+            regSet.verifyRegUsed(initReg);
+            *pInitRegZeroed = false; // The initReg does not contain zero
+
+            lastTouchDelta -= pageSize;
+        }
+
+        assert(lastTouchDelta == frameSize % pageSize);
+        compiler->unwindPadding();
+    }
+    else
+    {
+        assert(frameSize >= 3 * pageSize);
+
+        // Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
+        // until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
+        // the stack afterward (which means the stack pointer needs to be known).
+        //
+        // LOONGARCH64 needs 2 registers. See VERY_LARGE_FRAME_SIZE_REG_MASK for how these
+        // are reserved.
+
+        regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
+        availMask &= ~maskArgRegsLiveIn;   // Remove all of the incoming argument registers as they are currently live
+        availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+        regNumber rOffset = initReg;
+        regNumber rLimit;
+        regMaskTP tempMask;
+
+        // We don't need a register for the target of the dummy load
+        // ld_w $0,offset(base) will ignor the addr-exception.
+        regNumber rTemp = REG_R0;
+
+        // We pick the next lowest register number for rLimit
+        noway_assert(availMask != RBM_NONE);
+        tempMask = genFindLowestBit(availMask);
+        rLimit   = genRegNumFromMask(tempMask);
+        availMask &= ~tempMask;
+
+        // Generate:
+        //
+        //      instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
+        //      instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);
+        //      INS_lu12i_w, REG_R21, pageSize >> 12
+        //
+        // loop:
+        //      ldx_w  rTemp, sp, rOffset,
+        //      sub_d  rOffset, rOffset, REG_R21
+        //      bge rOffset, rLimit, loop                 // If rLimit is less or equal rOffset, we need to probe this
+        //      rOffset.
+
+        noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
+
+        GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, rOffset, -(ssize_t)pageSize >> 12);
+        regSet.verifyRegUsed(rOffset);
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);
+        regSet.verifyRegUsed(rLimit);
+
+        assert(!(pageSize & 0xfff));
+        GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, pageSize >> 12);
+
+        // There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
+        // `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.
+
+        GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
+        GetEmitter()->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, rOffset, rOffset, REG_R21);
+
+        assert(REG_R21 != rLimit);
+        assert(REG_R21 != rOffset);
+        ssize_t imm = -2 << 2;
+        GetEmitter()->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, imm);
+
+        *pInitRegZeroed = false; // The initReg does not contain zero
+
+        compiler->unwindPadding();
+
+        lastTouchDelta = frameSize % pageSize;
+    }
+
+    if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
+    {
+
+        assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
+        GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, REG_R0, REG_SPBASE, initReg);
+        compiler->unwindPadding();
+
+        regSet.verifyRegUsed(initReg);
+        *pInitRegZeroed = false; // The initReg does not contain zero
+    }
+}
+
+inline void CodeGen::genJumpToThrowHlpBlk_la(
+    SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
+{
+    assert(INS_beq <= ins && ins <= INS_bgeu);
+
+    bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
+
+    emitter* emit = GetEmitter();
+    if (useThrowHlpBlk)
+    {
+        // For code with throw helper blocks, find and use the helper block for
+        // raising the exception. The block may be shared by other trees too.
+
+        BasicBlock* excpRaisingBlock;
+
+        if (failBlk != nullptr)
+        {
+            // We already know which block to jump to. Use that.
+            excpRaisingBlock = failBlk;
+
+#ifdef DEBUG
+            Compiler::AddCodeDsc* add =
+                compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+            assert(excpRaisingBlock == add->acdDstBlk);
+#if !FEATURE_FIXED_OUT_ARGS
+            assert(add->acdStkLvlInit || isFramePointerUsed());
+#endif // !FEATURE_FIXED_OUT_ARGS
+#endif // DEBUG
+        }
+        else
+        {
+            // Find the helper-block which raises the exception.
+            Compiler::AddCodeDsc* add =
+                compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+            PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
+            excpRaisingBlock = add->acdDstBlk;
+#if !FEATURE_FIXED_OUT_ARGS
+            assert(add->acdStkLvlInit || isFramePointerUsed());
+#endif // !FEATURE_FIXED_OUT_ARGS
+        }
+
+        noway_assert(excpRaisingBlock != nullptr);
+
+        // Jump to the exception-throwing block on error.
+        emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5)); // 5-bits;
+    }
+    else
+    {
+        // The code to throw the exception will be generated inline, and
+        //  we will jump around it in the normal non-exception case.
+
+        void* pAddr = nullptr;
+        void* addr  = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr);
+        emitter::EmitCallType callType;
+        regNumber             callTarget;
+
+        // maybe optimize
+        // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne)));
+        if (ins == INS_blt)
+        {
+            ins = INS_bge;
+        }
+        else if (ins == INS_bltu)
+        {
+            ins = INS_bgeu;
+        }
+        else if (ins == INS_bge)
+        {
+            ins = INS_blt;
+        }
+        else if (ins == INS_bgeu)
+        {
+            ins = INS_bltu;
+        }
+        else
+        {
+            ins = ins == INS_beq ? INS_bne : INS_beq;
+        }
+
+        if (addr == nullptr)
+        {
+            callType   = emitter::EC_INDIR_R;
+            callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
+
+            // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.
+
+            // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+            // emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+            if (compiler->opts.compReloc)
+            {
+                ssize_t imm = (2 + 1) << 2; // , 1=jirl.
+                emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
+                GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+            }
+            else
+            {
+                ssize_t imm = (3 + 1) << 2; // , 1=jirl.
+                emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
+
+                // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+                // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
+                GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
+                GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget,
+                                            ((ssize_t)pAddr & 0xfff) >> 2);
+            }
+        }
+        else
+        { // INS_OPTS_C
+            callType   = emitter::EC_FUNC_TOKEN;
+            callTarget = REG_NA;
+
+            ssize_t imm = 5 << 2;
+            if (compiler->opts.compReloc)
+            {
+                imm = 3 << 2;
+            }
+
+            emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
+        }
+
+        emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)),
+                           INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur,
+                           gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                           callTarget,                                                    /* ireg */
+                           REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
+                           false                                                          /* isJump */
+                           );
+
+        regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind)));
+        regSet.verifyRegistersUsed(killMask);
+    }
+}
+
+//-----------------------------------------------------------------------------------
+// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
+//
+// Arguments:
+//     barrierKind - kind of barrier to emit (Only supports the Full now!! This depends on the CPU).
+//
+// Notes:
+//     All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1
+//
+void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
+{
+#ifdef DEBUG
+    if (JitConfig.JitNoMemoryBarriers() == 1)
+    {
+        return;
+    }
+#endif // DEBUG
+
+    // TODO-LOONGARCH64: Use the exact barrier type depending on the CPU.
+    GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL);
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
+{
+    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+    // Only hook if profiler says it's okay.
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+    // Need to save on to the stack level, since the helper call will pop the argument
+    unsigned saveStackLvl2 = genStackLevel;
+
+    /* Restore the stack level */
+    SetStackLevel(saveStackLvl2);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Push/Pop any callee-saved registers we have used
+ */
+void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+#if ETW_EBP_FRAMED
+    if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
+    {
+        noway_assert(!"Used register RBM_FPBASE as a scratch register!");
+    }
+#endif
+
+    // On LA we push the FP (frame-pointer) here along with all other callee saved registers
+    if (isFramePointerUsed())
+    {
+        rsPushRegs |= RBM_FPBASE;
+    }
+
+    //
+    // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require
+    // changes in GC suspension architecture.
+    //
+    // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+    // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+    // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+    // on the return address to be saved on the stack. If we skipped pushing/popping ra, the return address would never
+    // be saved on the stack and the GC suspension would time out.
+    //
+    // So if we wanted to skip pushing/popping ra for leaf frames, we would also need to do one of
+    // the following to make GC suspension work in the above scenario:
+    // - Make return address hijacking work even when ra is not saved on the stack.
+    // - Generate fully interruptible code for loops that contains calls
+    // - Generate fully interruptible code for leaf methods
+    //
+    // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
+    // is not worth it.
+    //
+
+    rsPushRegs |= RBM_RA; // We must save the return address (in the RA register).
+    regSet.rsMaskCalleeSaved    = rsPushRegs;
+    regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = rsPushRegs & ~maskSaveRegsFloat;
+
+#ifdef DEBUG
+    if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+    {
+        printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
+               compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
+        dspRegMask(rsPushRegs);
+        printf("\n");
+        assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+    }
+#endif // DEBUG
+
+    // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or
+    // requirements and
+    // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets()
+    // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture)
+    // for pictures of the funclet frame layouts.
+    //
+    // For most frames, generate, e.g.:
+    //      sdc1 f31, off+7*8(sp)
+    //      ...
+    //      sdc1 f24, off(sp)
+    //
+    //      sd s7, off2+7*8(sp)
+    //      ...
+    //      sd s1, off2+8(sp)
+    //      sd s0, off2(sp)
+    //
+    //      sd fp,  0(sp)
+    //      sd ra,  8(sp)
+    //
+    // Notes:
+    // 1. FP is always saved, and the first store is FP, RA.
+    // 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes, but SIMD/FP registers 16 bytes.
+    //    TODO-LOONGARCH64: supporting SIMD feature !
+    // 3. For frames with varargs, not implemented completely and not tested !
+    // 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+    //
+    // For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top
+    // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same
+    // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP.
+    // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage.
+    //
+    // The frames look like the following (simplified to only include components that matter for establishing the
+    // frames). See also Compiler::lvaAssignFrameOffsets().
+    //
+    //
+    // Frames with FP, RA saved at bottom of frame (above outgoing argument space):
+    //
+    //      |                       |
+    //      |-----------------------|
+    //      |  incoming arguments   |
+    //      +=======================+ <---- Caller's SP
+    //      |     Arguments  Or     | // if needed.
+    //      |  Varargs regs space   | // Only for varargs functions; (varargs not implemented for LoongArch64)
+    //      |-----------------------|
+    //      |Callee saved registers | // not including FP/RA; multiple of 8 bytes
+    //      |-----------------------|
+    //      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+    //      |-----------------------|
+    //      | locals, temps, etc.   |
+    //      |-----------------------|
+    //      |  possible GS cookie   |
+    //      |-----------------------|
+    //      |      Saved RA         | // 8 bytes
+    //      |-----------------------|
+    //      |      Saved FP         | // 8 bytes
+    //      |-----------------------|
+    //      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+    //      |-----------------------| <---- Ambient SP
+    //      |       |               |
+    //      ~       | Stack grows   ~
+    //      |       | downward      |
+    //              V
+    //
+    //
+    // Frames with FP, RA saved at top of frame (note: above all callee-saved regs):
+    //
+    //      |                       |
+    //      |-----------------------|
+    //      |  incoming arguments   |
+    //      +=======================+ <---- Caller's SP
+    //      |     Arguments  Or     | // if needed.
+    //      |  Varargs regs space   | // Only for varargs functions; (varargs not implemented for LoongArch64)
+    //      |-----------------------|
+    //      |      Saved RA         | // 8 bytes
+    //      |-----------------------|
+    //      |      Saved FP         | // 8 bytes
+    //      |-----------------------|
+    //      |Callee saved registers | // not including FP/RA; multiple of 8 bytes
+    //      |-----------------------|
+    //      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+    //      |-----------------------|
+    //      | locals, temps, etc.   |
+    //      |-----------------------|
+    //      |  possible GS cookie   |
+    //      |-----------------------|
+    //      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+    //      |-----------------------| <---- Ambient SP
+    //      |       |               |
+    //      ~       | Stack grows   ~
+    //      |       | downward      |
+    //              V
+    //
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Save float regs: ");
+        dspRegMask(maskSaveRegsFloat);
+        printf("\n");
+        printf("Save int   regs: ");
+        dspRegMask(maskSaveRegsInt);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we
+    // generate based on various sizes.
+    int frameType = 0;
+
+    // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the
+    // first save instruction as a "predecrement" amount, if possible.
+    int calleeSaveSPDelta = 0;
+
+    // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.)
+    bool establishFramePointer = true;
+
+    // If we do establish the frame pointer, what is the amount we add to SP to do so?
+    unsigned offsetSpToSavedFp = 0;
+
+    if (isFramePointerUsed())
+    {
+        // We need to save both FP and RA.
+
+        assert((maskSaveRegsInt & RBM_FP) != 0);
+        assert((maskSaveRegsInt & RBM_RA) != 0);
+
+        // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
+        // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses,
+        // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will
+        // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our
+        // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above
+        // the GS cookie.
+        //
+        // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to
+        // create a frame pointer chain.
+        //
+
+        if (totalFrameSize < 2048)
+        {
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize);
+            compiler->unwindAllocStack(totalFrameSize);
+
+            // Case #1.
+            //
+            // Generate:
+            //      daddiu sp, sp, -framesz
+            //      sd fp, outsz(sp)
+            //      sd ra, outsz+8(sp)
+            //
+            // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld.
+            //
+            // After saving callee-saved registers, we establish the frame pointer with:
+            //      daddiu fp, sp, offset-fp
+            // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+            JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
+
+            frameType = 1;
+
+            offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp);
+            compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp);
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8);
+            compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8);
+
+            maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+
+            offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA
+        }
+        else
+        {
+            JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
+
+            frameType = 2;
+
+            maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+
+            offset            = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+            offset            = calleeSaveSPDelta - offset;
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        assert((maskSaveRegsInt & RBM_FP) == 0);
+        assert((maskSaveRegsInt & RBM_RA) != 0);
+
+        // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using
+        // 'sd' if we only have one callee-saved register plus RA to save.
+
+        NYI("Frame without frame pointer");
+        offset = 0;
+    }
+
+    assert(frameType != 0);
+
+    JITDUMP("    offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
+    genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
+
+    // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+    // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+    // need to add codes at all.
+    if (compiler->info.compIsVarArgs)
+    {
+        JITDUMP("    compIsVarArgs=true\n");
+        NYI_LOONGARCH64("genPushCalleeSavedRegisters unsupports compIsVarArgs");
+    }
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+    {
+        printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType);
+    }
+#endif
+    if (frameType == 1)
+    {
+        // offsetSpToSavedFp = genSPtoFPdelta();
+    }
+    else if (frameType == 2)
+    {
+        if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+        {
+            offset            = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+            offset            = calleeSaveSPDelta - offset;
+
+            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+
+            offsetSpToSavedFp = offset;
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+            compiler->unwindSaveReg(REG_FP, offset);
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+            compiler->unwindSaveReg(REG_RA, offset + 8);
+
+            genEstablishFramePointer(offset, /* reportUnwindData */ true);
+
+            calleeSaveSPDelta = compiler->lvaOutgoingArgSpaceSize & ~0xf;
+            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+        }
+        else
+        {
+            calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
+            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+
+            offset = compiler->lvaOutgoingArgSpaceSize;
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+            compiler->unwindSaveReg(REG_FP, offset);
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+            compiler->unwindSaveReg(REG_RA, offset + 8);
+
+            genEstablishFramePointer(offset, /* reportUnwindData */ true);
+        }
+
+        establishFramePointer = false;
+    }
+    else
+    {
+        unreached();
+    }
+
+    if (establishFramePointer)
+    {
+        JITDUMP("    offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
+        genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
+    }
+}
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+    if (isFramePointerUsed())
+    {
+        rsRestoreRegs |= RBM_FPBASE;
+    }
+
+    rsRestoreRegs |= RBM_RA; // We must save/restore the return address.
+
+    regMaskTP regsToRestoreMask = rsRestoreRegs;
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int calleeSaveSPOffset = 0; // This will be the starting place for restoring
+                                // the callee-saved registers, in decreasing order.
+    int frameType         = 0;  // An indicator of what type of frame we are popping.
+    int calleeSaveSPDelta = 0;  // Amount to add to SP after callee-saved registers have been restored.
+
+    if (isFramePointerUsed())
+    {
+        if (totalFrameSize <= 2047)
+        {
+            if (compiler->compLocallocUsed)
+            {
+                int SPtoFPdelta = genSPtoFPdelta();
+                // Restore sp from fp
+                GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta);
+                compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
+            }
+
+            JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
+
+            frameType = 1;
+
+            regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+            calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+        }
+        else
+        {
+            JITDUMP("Frame type 2(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
+                    "localloc? %s\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                    dspBool(compiler->compLocallocUsed));
+
+            frameType = 2;
+
+            int outSzAligned;
+            if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+            {
+                int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
+                calleeSaveSPOffset = calleeSaveSPDelta - offset;
+
+                int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+                calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
+                offset2           = calleeSaveSPDelta - offset2;
+
+                if (compiler->compLocallocUsed)
+                {
+                    // Restore sp from fp
+                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                    compiler->unwindSetFrameReg(REG_FPBASE, offset2);
+                }
+                else
+                {
+                    outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
+                    genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
+                }
+
+                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                compiler->unwindSaveReg(REG_FP, offset2);
+
+                genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+            }
+            else
+            {
+                int offset2 = compiler->lvaOutgoingArgSpaceSize;
+                if (compiler->compLocallocUsed)
+                {
+                    // Restore sp from fp
+                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                    compiler->unwindSetFrameReg(REG_FPBASE, offset2);
+                }
+
+                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                compiler->unwindSaveReg(REG_FP, offset2);
+
+                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+
+                genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr,
+                                          /* reportUnwindData */ true);
+            }
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        NYI("Frame without frame pointer");
+        calleeSaveSPOffset = 0;
+    }
+
+    JITDUMP("    calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
+    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+
+    if (frameType == 1)
+    {
+        calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8);
+        compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset);
+        compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset);
+
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
+    }
+    else if (frameType == 2)
+    {
+        // had done.
+    }
+    else
+    {
+        unreached();
+    }
+}
+
+void CodeGen::genFnPrologCalleeRegArgs()
+{
+    assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn));
+
+    regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFnPrologCalleeRegArgs() LOONGARCH64:0x%llx.\n", regArgMaskLive);
+    }
+#endif
+
+    // We should be generating the prolog block when we are called
+    assert(compiler->compGeneratingProlog);
+
+    // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
+    noway_assert(regArgMaskLive != 0);
+
+    unsigned varNum;
+    unsigned regArgsVars[MAX_REG_ARG * 2] = {0};
+    unsigned regArgNum                    = 0;
+    for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
+    {
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+        // Is this variable a register arg?
+        if (!varDsc->lvIsParam)
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        if (varDsc->lvIsInReg())
+        {
+            assert(genIsValidIntReg(varDsc->GetArgReg()) || genIsValidFloatReg(varDsc->GetArgReg()));
+            assert(!(genIsValidIntReg(varDsc->GetOtherArgReg()) || genIsValidFloatReg(varDsc->GetOtherArgReg())));
+            if (varDsc->GetArgInitReg() != varDsc->GetArgReg())
+            {
+                if (varDsc->GetArgInitReg() > REG_ARG_LAST)
+                {
+                    inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG,
+                             varDsc->GetArgInitReg(), varDsc->GetArgReg(), false);
+                    regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+                }
+                else
+                {
+                    regArgsVars[regArgNum] = varNum;
+                    regArgNum++;
+                }
+            }
+            else
+                regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+#ifdef USING_SCOPE_INFO
+            psiMoveToReg(varNum);
+#endif // USING_SCOPE_INFO
+            if (!varDsc->lvLiveInOutOfHndlr)
+            {
+                continue;
+            }
+        }
+
+        // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
+        // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
+        // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
+        // use the the original TYP_STRUCT argument.
+        //
+        if (varDsc->lvPromoted || varDsc->lvIsStructField)
+        {
+            assert(!"-------------Should confirm on Loongarch!");
+        }
+
+        var_types storeType = TYP_UNDEF;
+        unsigned  slotSize  = TARGET_POINTER_SIZE;
+
+        if (varTypeIsStruct(varDsc))
+        {
+            if (emitter::isFloatReg(varDsc->GetArgReg()))
+            {
+                storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
+            }
+            else
+            {
+                assert(emitter::isGeneralRegister(varDsc->GetArgReg()));
+                if (varDsc->lvIs4Field1)
+                {
+                    storeType = TYP_INT;
+                }
+                else
+                {
+                    storeType = varDsc->GetLayout()->GetGCPtrType(0);
+                }
+            }
+            slotSize = (unsigned)emitActualTypeSize(storeType);
+
+#if FEATURE_MULTIREG_ARGS
+            // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
+            noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
+#endif
+        }
+        else // Not a struct type
+        {
+            storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
+            if (emitter::isFloatReg(varDsc->GetArgReg()) != varTypeIsFloating(storeType))
+            {
+                assert(varTypeIsFloating(storeType));
+                storeType = storeType == TYP_DOUBLE ? TYP_I_IMPL : TYP_INT;
+            }
+        }
+        emitAttr size = emitActualTypeSize(storeType);
+
+        regNumber srcRegNum = varDsc->GetArgReg();
+
+        // Stack argument - if the ref count is 0 don't care about it
+        if (!varDsc->lvOnFrame)
+        {
+            noway_assert(varDsc->lvRefCnt() == 0);
+            regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+            if (varDsc->GetOtherArgReg() < REG_STK)
+            {
+                regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg());
+            }
+        }
+        else
+        {
+            assert(srcRegNum != varDsc->GetOtherArgReg());
+
+            int       tmp_offset = 0;
+            regNumber tmp_reg    = REG_NA;
+
+            bool FPbased;
+            int  baseOffset = 0; //(regArgTab[argNum].slot - 1) * slotSize;
+            int  base       = compiler->lvaFrameAddress(varNum, &FPbased);
+
+            base += baseOffset;
+
+            if (emitter::isValidSimm12(base))
+            {
+                GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+            }
+            else
+            {
+                assert(tmp_reg == REG_NA);
+
+                tmp_offset = base;
+                tmp_reg    = REG_R21;
+                GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                // NOTE: `REG_R21` will be used within `emitIns_S_R`.
+                // Details see the comment for `emitIns_S_R`.
+                GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
+            }
+
+            regArgMaskLive &= ~genRegMask(srcRegNum);
+
+            // Check if we are writing past the end of the struct
+            if (varTypeIsStruct(varDsc))
+            {
+                if (emitter::isFloatReg(varDsc->GetOtherArgReg()))
+                {
+                    baseOffset = (int)EA_SIZE(emitActualTypeSize(storeType));
+                    storeType  = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE;
+                    size       = EA_SIZE(emitActualTypeSize(storeType));
+                    baseOffset = baseOffset < (int)size ? (int)size : baseOffset;
+                    srcRegNum  = varDsc->GetOtherArgReg();
+                }
+                else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg()))
+                {
+                    baseOffset = (int)EA_SIZE(slotSize);
+                    if (varDsc->lvIs4Field2)
+                    {
+                        storeType = TYP_INT;
+                    }
+                    else
+                    {
+                        storeType = varDsc->GetLayout()->GetGCPtrType(1);
+                    }
+                    size = emitActualTypeSize(storeType);
+                    if (baseOffset < (int)EA_SIZE(size))
+                    {
+                        baseOffset = (int)EA_SIZE(size);
+                    }
+                    srcRegNum = varDsc->GetOtherArgReg();
+                }
+
+                if (srcRegNum == varDsc->GetOtherArgReg())
+                {
+                    base += baseOffset;
+
+                    if (emitter::isValidSimm12(base))
+                    {
+                        GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+                    }
+                    else
+                    {
+                        if (tmp_reg == REG_NA)
+                        {
+                            tmp_offset = base;
+                            tmp_reg    = REG_R21;
+                            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                            // NOTE: `REG_R21` will be used within `emitIns_S_R`.
+                            // Details see the comment for `emitIns_S_R`.
+                            GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
+                        }
+                        else
+                        {
+                            baseOffset = -(base - tmp_offset) - 8;
+                            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8);
+                            GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, baseOffset);
+                        }
+                    }
+                    regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better!
+                }
+                else if (varDsc->lvIsSplit)
+                {
+                    assert(varDsc->GetArgReg() == REG_ARG_LAST && varDsc->GetOtherArgReg() == REG_STK);
+                    baseOffset = 8;
+                    base += 8;
+
+                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize());
+                    if (emitter::isValidSimm12(base))
+                    {
+                        GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset);
+                    }
+                    else
+                    {
+                        if (tmp_reg == REG_NA)
+                        {
+                            tmp_offset = base;
+                            tmp_reg    = REG_R21;
+                            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                            // NOTE: `REG_R21` will be used within `emitIns_S_R`.
+                            // Details see the comment for `emitIns_S_R`.
+                            GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8);
+                        }
+                        else
+                        {
+                            baseOffset = -(base - tmp_offset) - 8;
+                            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8);
+                            GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, baseOffset);
+                        }
+                    }
+                }
+            }
+
+#ifdef USING_SCOPE_INFO
+            {
+                psiMoveToStack(varNum);
+            }
+#endif // USING_SCOPE_INFO
+        }
+    }
+
+    while (regArgNum > 0)
+    {
+        varNum            = regArgsVars[regArgNum - 1];
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+        if (varDsc->GetArgInitReg() > varDsc->GetArgReg())
+        {
+            var_types destMemType = varDsc->TypeGet();
+            GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(),
+                                      varDsc->GetArgReg());
+            regArgNum--;
+            regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+        }
+        else
+        {
+            for (unsigned i = 0; i < regArgNum; i++)
+            {
+                LclVarDsc* varDsc2     = compiler->lvaTable + regArgsVars[i];
+                var_types  destMemType = varDsc2->GetRegisterType();
+                inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false,
+                         emitActualTypeSize(destMemType));
+                regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg());
+            }
+            break;
+        }
+    }
+
+    assert(!regArgMaskLive);
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+//     initReg        - register to use as scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+//                      set to non-zero value after this call.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // Give profiler a chance to back out of hooking this method
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+}
+
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
+{
+    unsigned alignment = 0;
+    unsigned opSize    = 0;
+
+    if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
+    {
+        opSize = InferStructOpSizeAlign(op, &alignment);
+    }
+    else
+    {
+        alignment = genTypeAlignments[op->TypeGet()];
+        opSize    = genTypeSizes[op->TypeGet()];
+    }
+
+    assert(opSize != 0);
+    assert(alignment != 0);
+
+    (*alignmentWB) = alignment;
+    return opSize;
+}
+
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
+{
+    unsigned alignment = 0;
+    unsigned opSize    = 0;
+
+    while (op->gtOper == GT_COMMA)
+    {
+        op = op->AsOp()->gtOp2;
+    }
+
+    if (op->gtOper == GT_OBJ)
+    {
+        CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->GetLayout()->GetClassHandle();
+        opSize                      = op->AsObj()->GetLayout()->GetSize();
+        alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+    }
+    else if (op->gtOper == GT_LCL_VAR)
+    {
+        const LclVarDsc* varDsc = compiler->lvaGetDesc(op->AsLclVarCommon());
+        assert(varDsc->lvType == TYP_STRUCT);
+        opSize = varDsc->lvSize();
+        {
+            alignment = TARGET_POINTER_SIZE;
+        }
+    }
+    else if (op->gtOper == GT_MKREFANY)
+    {
+        opSize    = TARGET_POINTER_SIZE * 2;
+        alignment = TARGET_POINTER_SIZE;
+    }
+    else if (op->IsArgPlaceHolderNode())
+    {
+        CORINFO_CLASS_HANDLE clsHnd = op->AsArgPlace()->gtArgPlaceClsHnd;
+        assert(clsHnd != 0);
+        opSize    = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+        alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+    }
+    else
+    {
+        assert(!"Unhandled gtOper");
+        opSize    = TARGET_POINTER_SIZE;
+        alignment = TARGET_POINTER_SIZE;
+    }
+
+    assert(opSize != 0);
+    assert(alignment != 0);
+
+    (*alignmentWB) = alignment;
+    return opSize;
+}
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 5d7b518aadd4f..b30388047efe0 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -537,12 +537,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
             useType = TYP_SHORT;
             break;
 
-#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
+#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
         case 3:
             useType = TYP_INT;
             break;
 
-#endif // !TARGET_XARCH || UNIX_AMD64_ABI
+#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64
 
 #ifdef TARGET_64BIT
         case 4:
@@ -550,14 +550,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
             useType = TYP_INT;
             break;
 
-#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
+#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
         case 5:
         case 6:
         case 7:
             useType = TYP_I_IMPL;
             break;
 
-#endif // !TARGET_XARCH || UNIX_AMD64_ABI
+#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64
 #endif // TARGET_64BIT
 
         case TARGET_POINTER_SIZE:
@@ -749,10 +749,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
                     useType         = TYP_UNKNOWN;
                 }
 
-#elif defined(TARGET_X86) || defined(TARGET_ARM)
+#elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
 
                 // Otherwise we pass this struct by value on the stack
                 // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
+                // On LOONGARCH64 struct that is 1-16 bytes is passed by value in one/two register(s)
                 howToPassStruct = SPK_ByValue;
                 useType         = TYP_STRUCT;
 
@@ -776,7 +777,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
             howToPassStruct = SPK_ByValue;
             useType         = TYP_STRUCT;
 
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             // Otherwise we pass this struct by reference to a copy
             // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
@@ -901,6 +902,22 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
         howToReturnStruct   = SPK_ByReference;
         useType             = TYP_UNKNOWN;
     }
+#elif TARGET_LOONGARCH64
+    if (structSize <= (TARGET_POINTER_SIZE * 2))
+    {
+        uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
+
+        if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
+        {
+            howToReturnStruct = SPK_PrimitiveType;
+            useType           = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT;
+        }
+        else if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
+        {
+            howToReturnStruct = SPK_ByValue;
+            useType           = TYP_STRUCT;
+        }
+    }
 #endif
     if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) &&
         !isNativePrimitiveStructType(clsHnd))
@@ -1043,6 +1060,12 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
                 howToReturnStruct = SPK_ByReference;
                 useType           = TYP_UNKNOWN;
 
+#elif defined(TARGET_LOONGARCH64)
+
+                // On LOONGARCH64 struct that is 1-16 bytes is returned by value in one/two register(s)
+                howToReturnStruct = SPK_ByValue;
+                useType           = TYP_STRUCT;
+
 #else //  TARGET_XXX
 
                 noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
@@ -2218,6 +2241,11 @@ void Compiler::compSetProcessor()
         info.genCPU = CPU_X86_PENTIUM_4;
     else
         info.genCPU = CPU_X86;
+
+#elif defined(TARGET_LOONGARCH64)
+
+    info.genCPU = CPU_LOONGARCH64;
+
 #endif
 
     //
@@ -3888,7 +3916,7 @@ void Compiler::compSetOptimizationLevel()
     fgCanRelocateEHRegions = true;
 }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 // Function compRsvdRegCheck:
 //  given a curState to use for calculating the total frame size
 //  it will return true if the REG_OPT_RSVD should be reserved so
@@ -3933,6 +3961,10 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
     JITDUMP(" Returning true (ARM64)\n\n");
     return true; // just always assume we'll need it, for now
 
+#elif defined(TARGET_LOONGARCH64)
+    JITDUMP(" Returning true (LOONGARCH64)\n\n");
+    return true; // just always assume we'll need it, for now
+
 #else  // TARGET_ARM
 
     // frame layout:
@@ -4056,7 +4088,7 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
     return false;
 #endif // TARGET_ARM
 }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 //------------------------------------------------------------------------
 // compGetTieringName: get a string describing tiered compilation settings
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 4b378ce991ff6..3dc4f23f18d1d 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -481,9 +481,15 @@ class LclVarDsc
 
     unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
-#endif                                   // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+
+#if defined(TARGET_LOONGARCH64)
+    unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64.
+    unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64.
+    unsigned char lvIsSplit : 1;   // Set if the argument is splited.
+#endif                             // defined(TARGET_LOONGARCH64)
 
     unsigned char lvIsBoolean : 1; // set if variable is boolean
     unsigned char lvSingleDef : 1; // variable has a single def
@@ -1014,7 +1020,7 @@ class LclVarDsc
         }
 #endif
         assert(m_layout != nullptr);
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         assert(varTypeIsStruct(TypeGet()) || (lvIsImplicitByRef && (TypeGet() == TYP_BYREF)));
 #else
         assert(varTypeIsStruct(TypeGet()));
@@ -1623,7 +1629,7 @@ struct FuncInfoDsc
     emitLocation* coldStartLoc; // locations for the cold section, if there is one.
     emitLocation* coldEndLoc;
 
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
     UnwindInfo  uwi;     // Unwind information for this function/funclet's hot  section
     UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section
@@ -1638,7 +1644,7 @@ struct FuncInfoDsc
     emitLocation* coldStartLoc; // locations for the cold section, if there is one.
     emitLocation* coldEndLoc;
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #if defined(FEATURE_CFI_SUPPORT)
     jitstd::vector<CFI_CODE>* cfiCodes;
@@ -1669,6 +1675,12 @@ struct fgArgTabEntry
     unsigned numRegs; // Count of number of registers that this argument uses.
                       // Note that on ARM, if we have a double hfa, this reflects the number
                       // of DOUBLE registers.
+#ifdef TARGET_LOONGARCH64
+    // For LoongArch64's ABI, the struct which has float field(s) and no more than two fields
+    // may be passed by float register(s).
+    // e.g  `struct {int a; float b;}` passed by an integer register and a float register.
+    var_types structFloatFieldType[2];
+#endif
 
 #if defined(UNIX_AMD64_ABI)
     // Unix amd64 will split floating point types and integer types in structs
@@ -2158,7 +2170,7 @@ struct fgArgTabEntry
     // register numbers.
     void SetMultiRegNums()
     {
-#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
+#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
         if (numRegs == 1)
         {
             return;
@@ -2179,7 +2191,7 @@ struct fgArgTabEntry
             argReg = (regNumber)(argReg + regSize);
             setRegNum(regIndex, argReg);
         }
-#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
+#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
     }
 
 #ifdef DEBUG
@@ -2297,6 +2309,20 @@ class fgArgInfo
                              const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
 #endif // UNIX_AMD64_ABI
 
+#if defined(TARGET_LOONGARCH64)
+    fgArgTabEntry* AddRegArg(unsigned          argNum,
+                             GenTree*          node,
+                             GenTreeCall::Use* use,
+                             regNumber         regNum,
+                             unsigned          numRegs,
+                             unsigned          byteSize,
+                             unsigned          byteAlignment,
+                             bool              isStruct,
+                             bool              isFloatHfa, /* unused */
+                             bool              isVararg,
+                             const regNumber   nextOtherRegNum);
+#endif
+
     fgArgTabEntry* AddStkArg(unsigned          argNum,
                              GenTree*          node,
                              GenTreeCall::Use* use,
@@ -4057,7 +4083,7 @@ class Compiler
     // For ARM64, this is structs larger than 16 bytes that are passed by reference.
     bool lvaIsImplicitByRefLocal(unsigned varNum)
     {
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         LclVarDsc* varDsc = lvaGetDesc(varNum);
         if (varDsc->lvIsImplicitByRef)
         {
@@ -4066,7 +4092,7 @@ class Compiler
             assert(varTypeIsStruct(varDsc) || (varDsc->lvType == TYP_BYREF));
             return true;
         }
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         return false;
     }
 
@@ -8324,6 +8350,9 @@ class Compiler
 #elif defined(TARGET_ARM64)
             reg     = REG_R11;
             regMask = RBM_R11;
+#elif defined(TARGET_LOONGARCH64)
+            reg     = REG_T8;
+            regMask = RBM_T8;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -8732,6 +8761,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void unwindReturn(regNumber reg);                                             // ret lr
 #endif                                                                            // defined(TARGET_ARM64)
 
+#if defined(TARGET_LOONGARCH64)
+    void unwindNop();
+    void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last
+                          // instruction and the current location.
+    void unwindSaveReg(regNumber reg, int offset);
+    void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset);
+    void unwindReturn(regNumber reg);
+#endif // defined(TARGET_LOONGARCH64)
+
     //
     // Private "helper" functions for the unwind implementation.
     //
@@ -8817,9 +8855,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
 #elif defined(TARGET_ARM64)
         CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
+#elif defined(TARGET_LOONGARCH64)
+        // TODO: supporting SIMD feature for LoongArch64.
+        assert(!"unimplemented yet on LA");
+        CORINFO_InstructionSet minimumIsa = 0;
 #else
 #error Unsupported platform
-#endif // !TARGET_XARCH && !TARGET_ARM64
+#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64
 
         return compOpportunisticallyDependsOn(minimumIsa);
 #else
@@ -10272,6 +10314,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #define CPU_ARM 0x0300   // The generic ARM CPU
 #define CPU_ARM64 0x0400 // The generic ARM64 CPU
 
+#define CPU_LOONGARCH64 0x0800 // The generic LOONGARCH64 CPU
+
         unsigned genCPU; // What CPU are we running on
 
         // Number of class profile probes in this method
@@ -10797,7 +10841,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void compSetProcessor();
     void compInitDebuggingInfo();
     void compSetOptimizationLevel();
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     bool compRsvdRegCheck(FrameLayoutState curState);
 #endif
     void compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags);
@@ -12149,6 +12193,13 @@ const instruction INS_SQRT = INS_fsqrt;
 
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+const instruction INS_BREAKPOINT = INS_break;
+const instruction INS_MULADD     = INS_fmadd_d; // NOTE: default is double.
+const instruction INS_ABS        = INS_fabs_d;  // NOTE: default is double.
+const instruction INS_SQRT       = INS_fsqrt_d; // NOTE: default is double.
+#endif                                          // TARGET_LOONGARCH64
+
 /*****************************************************************************/
 
 extern const BYTE genTypeSizes[];
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index b74234165b03f..f3aeff3c2e54e 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -602,7 +602,7 @@ inline bool isRegParamType(var_types type)
 #endif // !TARGET_X86
 }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH)
+#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 /*****************************************************************************/
 // Returns true if 'type' is a struct that can be enregistered for call args
 //                         or can be returned by value in multiple registers.
@@ -660,7 +660,7 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types                typ
 
     return result;
 }
-#endif // TARGET_AMD64 || TARGET_ARMARCH
+#endif // TARGET_AMD64 || TARGET_ARMARCH || TARGET_LOONGARCH64
 
 /*****************************************************************************/
 
@@ -1108,7 +1108,7 @@ inline GenTreeField* Compiler::gtNewFieldRef(var_types type, CORINFO_FIELD_HANDL
         LclVarDsc* varDsc = lvaGetDesc(obj->AsUnOp()->gtOp1->AsLclVarCommon());
 
         varDsc->lvFieldAccessed = 1;
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         // These structs are passed by reference and can easily become global
         // references if those references are exposed. We clear out
         // address-exposure information for these parameters when they are
@@ -1120,7 +1120,7 @@ inline GenTreeField* Compiler::gtNewFieldRef(var_types type, CORINFO_FIELD_HANDL
         {
             fieldNode->gtFlags |= GTF_GLOB_REF;
         }
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     }
     else
     {
@@ -1837,10 +1837,10 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState
 
             bool doubleWeight = lvIsTemp;
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // and, for the time being, implicit byref params
             doubleWeight |= lvIsImplicitByRef;
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             if (doubleWeight && (weight * 2 > weight))
             {
@@ -3084,6 +3084,8 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
 
 #ifdef TARGET_ARM
     return regNum - REG_F0;
+#elif defined(TARGET_LOONGARCH64)
+    return regNum - REG_F0;
 #elif defined(TARGET_ARM64)
     return regNum - REG_V0;
 #elif defined(UNIX_AMD64_ABI)
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
index fc354aaffc563..4f0556f1b8ec7 100644
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -444,6 +444,14 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO*
                 }
             }
         }
+#elif defined(TARGET_LOONGARCH64)
+        // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference
+        if (structSize > MAX_PASS_MULTIREG_BYTES)
+        {
+            // This struct is passed by reference using a single 'slot'
+            return TARGET_POINTER_SIZE;
+        }
+//  otherwise will we pass this struct by value in multiple registers
 #elif !defined(TARGET_ARM)
         NYI("unknown target");
 #endif // defined(TARGET_XXX)
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index a507cadf36aed..ba73a2f8e09f3 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -1118,6 +1118,10 @@ void emitter::emitBegFN(bool hasFramePtr
     emitFirstColdIG   = nullptr;
     emitTotalCodeSize = 0;
 
+#ifdef TARGET_LOONGARCH64
+    emitCounts_INS_OPTS_J = 0;
+#endif
+
 #if EMITTER_STATS
     emitTotalIGmcnt++;
     emitSizeMethod      = 0;
@@ -1296,6 +1300,13 @@ weight_t emitter::getCurrentBlockWeight()
     }
 }
 
+#if defined(TARGET_LOONGARCH64)
+void emitter::dispIns(instrDesc* id)
+{
+    // For LoongArch64 using the emitDisInsName().
+    NYI_LOONGARCH64("Not used on LOONGARCH64.");
+}
+#else
 void emitter::dispIns(instrDesc* id)
 {
 #ifdef DEBUG
@@ -1317,6 +1328,7 @@ void emitter::dispIns(instrDesc* id)
     emitIFcounts[id->idInsFmt()]++;
 #endif
 }
+#endif
 
 void emitter::appendToCurIG(instrDesc* id)
 {
@@ -2305,6 +2317,11 @@ void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi)
 #ifdef TARGET_AMD64
             // doesn't have to be all negative on amd
             printf("-%04X ... %04X\n", -offsLo, offsHi);
+#elif defined(TARGET_LOONGARCH64)
+            if (offsHi < 0)
+                printf("-%04X ... -%04X\n", -offsLo, -offsHi);
+            else
+                printf("-%04X ... %04X\n", -offsLo, offsHi);
 #else
             printf("-%04X ... -%04X\n", -offsLo, -offsHi);
             assert(offsHi <= 0);
@@ -2638,7 +2655,7 @@ const char* emitter::emitLabelString(insGroup* ig)
 
 #endif // DEBUG
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 // Does the argument location point to an IG at the end of a function or funclet?
 // We can ignore the codePos part of the location, since it doesn't affect the
@@ -2999,9 +3016,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context)
     Compiler* comp = (Compiler*)context;
 #if defined(TARGET_ARM)
     comp->unwindNop(id->idCodeSize());
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     comp->unwindNop();
-#endif // defined(TARGET_ARM64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 }
 
 /*****************************************************************************
@@ -3015,7 +3032,7 @@ void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp)
     emitWalkIDs(locFrom, emitGenerateUnwindNop, comp);
 }
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #if defined(TARGET_ARM)
 
@@ -3402,6 +3419,9 @@ const size_t hexEncodingSize = 19;
 #elif defined(TARGET_ARM)
 const size_t basicIndent     = 12;
 const size_t hexEncodingSize = 11;
+#elif defined(TARGET_LOONGARCH64)
+const size_t basicIndent     = 12;
+const size_t hexEncodingSize = 19;
 #endif
 
 #ifdef DEBUG
@@ -4083,8 +4103,10 @@ void emitter::emitDispCommentForHandle(size_t handle, GenTreeFlags flag)
  *  ARM64 has a small and large encoding for both conditional branch and loading label addresses.
  *      The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently
  *      NYI).
+ *  LoongArch64 has an individual implementation for emitJumpDistBind().
  */
 
+#ifndef TARGET_LOONGARCH64
 void emitter::emitJumpDistBind()
 {
 #ifdef DEBUG
@@ -4835,6 +4857,7 @@ void emitter::emitJumpDistBind()
     emitCheckIGoffsets();
 #endif // DEBUG
 }
+#endif
 
 #if FEATURE_LOOP_ALIGN
 
@@ -5645,6 +5668,11 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr)
 
 void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
 {
+#ifdef TARGET_LOONGARCH64
+    // TODO-LoongArch64: support idDebugOnlyInfo.
+    return;
+#else
+
 #ifdef DEBUG
     // We should not be jumping/branching across funclets/functions
     // Except possibly a 'call' to a finally funclet for a local unwind
@@ -5740,6 +5768,7 @@ void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
         }
     }
 #endif // DEBUG
+#endif
 }
 
 /*****************************************************************************
@@ -6523,7 +6552,11 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
                     }
                 }
 
-#endif // TARGET_XARCH
+#elif defined(TARGET_LOONGARCH64)
+
+                isJccAffectedIns = true;
+
+#endif // TARGET_LOONGARCH64
 
                 // Jcc affected instruction boundaries were printed above; handle other cases here.
                 if (!isJccAffectedIns)
@@ -6693,6 +6726,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
 #elif defined(TARGET_ARM64)
                     assert(!jmp->idAddr()->iiaHasInstrCount());
                     emitOutputLJ(NULL, adr, jmp);
+#elif defined(TARGET_LOONGARCH64)
+                    // For LoongArch64 `emitFwdJumps` is always false.
+                    unreached();
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -6706,6 +6742,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
 #elif defined(TARGET_ARMARCH)
                     assert(!jmp->idAddr()->iiaHasInstrCount());
                     emitOutputLJ(NULL, adr, jmp);
+#elif defined(TARGET_LOONGARCH64)
+                    // For LoongArch64 `emitFwdJumps` is always false.
+                    unreached();
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -8678,7 +8717,7 @@ cnsval_ssize_t emitter::emitGetInsSC(instrDesc* id)
         int  adr = emitComp->lvaFrameAddress(varNum, &FPbased);
         int  dsp = adr + offs;
         if (id->idIns() == INS_sub)
-            dsp = -dsp;
+            dsp    = -dsp;
 #endif
         return dsp;
     }
@@ -9290,13 +9329,14 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
             // This uses and defs RDI and RSI.
             result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
             break;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
             result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
             break;
 #else
             assert(!"unknown arch");
 #endif
 
+#if !defined(TARGET_LOONGARCH64)
         case CORINFO_HELP_PROF_FCN_ENTER:
             result = RBM_PROFILER_ENTER_TRASH;
             break;
@@ -9313,8 +9353,9 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
         case CORINFO_HELP_PROF_FCN_TAILCALL:
             result = RBM_PROFILER_TAILCALL_TRASH;
             break;
+#endif // !defined(TARGET_LOONGARCH64)
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         case CORINFO_HELP_ASSIGN_REF:
         case CORINFO_HELP_CHECKED_ASSIGN_REF:
             result = RBM_CALLEE_GCTRASH_WRITEBARRIER;
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 1571fc00cbafa..60971839bc507 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -590,15 +590,21 @@ class emitter
 #define INSTR_ENCODED_SIZE 4
         static_assert_no_msg(INS_count <= 512);
         instruction _idIns : 9;
-#else  // !(defined(TARGET_XARCH) || defined(TARGET_ARM64))
+#elif defined(TARGET_LOONGARCH64)
+        // TODO-LoongArch64: not include SIMD-vector.
+        static_assert_no_msg(INS_count <= 512);
+        instruction _idIns : 9;
+#else
         static_assert_no_msg(INS_count <= 256);
         instruction _idIns : 8;
-#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64))
+#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
 
 // The format for the instruction
 #if defined(TARGET_XARCH)
         static_assert_no_msg(IF_COUNT <= 128);
         insFormat _idInsFmt : 7;
+#elif defined(TARGET_LOONGARCH64)
+        unsigned    _idCodeSize : 5; // the instruction(s) size of this instrDesc described.
 #else
         static_assert_no_msg(IF_COUNT <= 256);
         insFormat _idInsFmt : 8;
@@ -624,7 +630,16 @@ class emitter
             return idInsIs(ins) || idInsIs(rest...);
         }
 
+#if defined(TARGET_LOONGARCH64)
         insFormat idInsFmt() const
+        { // not used for LOONGARCH64.
+            return (insFormat)0;
+        }
+        void idInsFmt(insFormat insFmt)
+        {
+        }
+#else
+        insFormat   idInsFmt() const
         {
             return _idInsFmt;
         }
@@ -636,6 +651,7 @@ class emitter
             assert(insFmt < IF_COUNT);
             _idInsFmt = insFmt;
         }
+#endif
 
         void idSetRelocFlags(emitAttr attr)
         {
@@ -649,6 +665,7 @@ class emitter
         // amd64: 17 bits
         // arm:   16 bits
         // arm64: 17 bits
+        // loongarch64: 14 bits
 
     private:
 #if defined(TARGET_XARCH)
@@ -656,11 +673,11 @@ class emitter
         opSize   _idOpSize : 3;   // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32
                                   // At this point we have fully consumed first DWORD so that next field
                                   // doesn't cross a byte boundary.
-#elif defined(TARGET_ARM64)
-// Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields
-#else  // ARM
-        opSize      _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
-#endif // ARM
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+/* _idOpSize defined below. */
+#else
+        opSize    _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
+#endif // ARM || TARGET_LOONGARCH64
 
         // On Amd64, this is where the second DWORD begins
         // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has  member that
@@ -708,6 +725,14 @@ class emitter
         unsigned _idLclVar : 1; // access a local on stack
 #endif
 
+#ifdef TARGET_LOONGARCH64
+        // TODO-LoongArch64: maybe delete on future.
+        opSize  _idOpSize : 3;  // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
+        insOpts _idInsOpt : 6;  // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the
+                                // accessing a local on stack.
+        unsigned _idLclVar : 1; // access a local on stack.
+#endif
+
 #ifdef TARGET_ARM
         insSize  _idInsSize : 2;   // size of instruction: 16, 32 or 48 bits
         insFlags _idInsFlags : 1;  // will this instruction set the flags
@@ -721,8 +746,8 @@ class emitter
 #elif defined(TARGET_ARM64)
 // For Arm64, we have used 17 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (17)
-#elif defined(TARGET_XARCH)
-                                   // For xarch, we have used 14 bits from the second DWORD.
+#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64)
+                                 // For xarch and LoongArch64, we have used 14 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (14)
 #else
 #error Unsupported or unset target architecture
@@ -734,6 +759,7 @@ class emitter
         // amd64: 46 bits
         // arm:   48 bits
         // arm64: 49 bits
+        // loongarch64: 46 bits
 
         unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
         unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
@@ -746,6 +772,7 @@ class emitter
         // amd64: 48 bits
         // arm:   50 bits
         // arm64: 51 bits
+        // loongarch64: 48 bits
         CLANG_FORMAT_COMMENT_ANCHOR;
 
 #define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS)
@@ -823,7 +850,7 @@ class emitter
 // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
 // about reading what we think is here, to avoid unexpected corruption issues.
 
-#ifndef TARGET_ARM64
+#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
             emitLclVarAddr iiaLclVar;
 #endif
             BasicBlock* iiaBBlabel;
@@ -877,7 +904,38 @@ class emitter
                 regNumber _idReg3 : REGNUM_BITS;
                 regNumber _idReg4 : REGNUM_BITS;
             };
-#endif // defined(TARGET_XARCH)
+#elif defined(TARGET_LOONGARCH64)
+            struct
+            {
+                unsigned int iiaEncodedInstr; // instruction's binary encoding.
+                regNumber    _idReg3 : REGNUM_BITS;
+                regNumber    _idReg4 : REGNUM_BITS;
+            };
+
+            struct
+            {
+                int            iiaJmpOffset; // temporary saving the offset of jmp or data.
+                emitLclVarAddr iiaLclVar;
+            };
+
+            void iiaSetInstrEncode(unsigned int encode)
+            {
+                iiaEncodedInstr = encode;
+            }
+            unsigned int iiaGetInstrEncode() const
+            {
+                return iiaEncodedInstr;
+            }
+
+            void iiaSetJmpOffset(int offset)
+            {
+                iiaJmpOffset = offset;
+            }
+            int iiaGetJmpOffset() const
+            {
+                return iiaJmpOffset;
+            }
+#endif // defined(TARGET_LOONGARCH64)
 
         } _idAddrUnion;
 
@@ -977,7 +1035,20 @@ class emitter
             _idInsFlags = sf;
             assert(sf == _idInsFlags);
         }
-#endif // TARGET_ARM
+
+#elif defined(TARGET_LOONGARCH64)
+        unsigned    idCodeSize() const
+        {
+            return _idCodeSize;
+        }
+        void idCodeSize(unsigned sz)
+        {
+            // LoongArch64's instrDesc is not always meaning only one instruction.
+            // e.g. the `emitter::emitIns_I_la` for emitting the immediates.
+            assert(sz <= 16);
+            _idCodeSize = sz;
+        }
+#endif // TARGET_LOONGARCH64
 
         emitAttr idOpSize()
         {
@@ -1102,6 +1173,42 @@ class emitter
 
 #endif // TARGET_ARMARCH
 
+#ifdef TARGET_LOONGARCH64
+        insOpts idInsOpt() const
+        {
+            return (insOpts)_idInsOpt;
+        }
+        void idInsOpt(insOpts opt)
+        {
+            _idInsOpt = opt;
+            assert(opt == _idInsOpt);
+        }
+
+        regNumber idReg3() const
+        {
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg3;
+        }
+        void idReg3(regNumber reg)
+        {
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg3 = reg;
+            assert(reg == idAddr()->_idReg3);
+        }
+        regNumber idReg4() const
+        {
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg4;
+        }
+        void idReg4(regNumber reg)
+        {
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg4 = reg;
+            assert(reg == idAddr()->_idReg4);
+        }
+
+#endif // TARGET_LOONGARCH64
+
         inline static bool fitsInSmallCns(ssize_t val)
         {
             return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS));
@@ -1190,6 +1297,17 @@ class emitter
         }
 #endif // defined(TARGET_ARM)
 
+#ifdef TARGET_LOONGARCH64
+        bool idIsLclVar() const
+        {
+            return _idLclVar != 0;
+        }
+        void idSetIsLclVar()
+        {
+            _idLclVar = 1;
+        }
+#endif // TARGET_LOONGARCH64
+
         bool idIsCnsReloc() const
         {
             return _idCnsReloc != 0;
@@ -1340,6 +1458,23 @@ class emitter
 #define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
 #define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
 
+#elif defined(TARGET_LOONGARCH64)
+// a read,write or modify from stack location, possible def to use latency from L0 cache
+#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C
+
+// a read, write or modify from constant location, possible def to use latency from L0 cache
+#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C
+
+// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
+// plus an extra cost  (of 1.0) for a increased chance  of a cache miss
+#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
+
 #endif // TARGET_XXX
 
 // Make this an enum:
@@ -1750,6 +1885,10 @@ class emitter
 #endif // defined(TARGET_X86)
 #endif // !defined(HOST_64BIT)
 
+#ifdef TARGET_LOONGARCH64
+    unsigned int emitCounts_INS_OPTS_J;
+#endif // TARGET_LOONGARCH64
+
     size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
     size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
 
@@ -1815,7 +1954,7 @@ class emitter
     //
     CLANG_FORMAT_COMMENT_ANCHOR;
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 // ARM32 and ARM64 both can require a bigger prolog instruction group. One scenario is where
 // a function uses all the incoming integer and single-precision floating-point arguments,
 // and must store them all to the frame on entry. If the frame is very large, we generate
@@ -1829,9 +1968,10 @@ class emitter
 //     ldr     w8, [fp, xip1]        // [V10 arg10]
 // which eats up our insGroup buffer.
 #define SC_IG_BUFFER_SIZE (200 * sizeof(emitter::instrDesc))
-#else // !TARGET_ARMARCH
+
+#else
 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
-#endif // !TARGET_ARMARCH
+#endif // !(TARGET_ARMARCH || TARGET_LOONGARCH64)
 
     size_t emitIGbuffSize;
 
@@ -2013,7 +2153,7 @@ class emitter
     const char* emitLabelString(insGroup* ig);
 #endif
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
     void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt);
 
@@ -2027,7 +2167,7 @@ class emitter
 
     static void emitGenerateUnwindNop(instrDesc* id, void* context);
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #ifdef TARGET_X86
     void emitMarkStackLvl(unsigned stackLevel);
@@ -2197,8 +2337,10 @@ class emitter
     static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind);
 
 #ifdef DEBUG
+#ifndef TARGET_LOONGARCH64
     void emitInsSanityCheck(instrDesc* id);
 #endif
+#endif
 
 #ifdef TARGET_ARMARCH
     // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC
@@ -2218,7 +2360,10 @@ class emitter
 
     // Returns "true" if instruction "id->idIns()" writes to a LclVar stack slot pair.
     bool emitInsWritesToLclVarStackLocPair(instrDesc* id);
-#endif // TARGET_ARMARCH
+#elif defined(TARGET_LOONGARCH64)
+    bool emitInsMayWriteToGCReg(instruction ins);
+    bool emitInsWritesToLclVarStackLoc(instrDesc* id);
+#endif // TARGET_LOONGARCH64
 
     /************************************************************************/
     /*    The following is used to distinguish helper vs non-helper calls   */
diff --git a/src/coreclr/jit/emitdef.h b/src/coreclr/jit/emitdef.h
index c9f003ccce1b6..35b46314a1225 100644
--- a/src/coreclr/jit/emitdef.h
+++ b/src/coreclr/jit/emitdef.h
@@ -12,6 +12,8 @@
 #include "emitarm.h"
 #elif defined(TARGET_ARM64)
 #include "emitarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "emitloongarch64.h"
 #else
 #error Unsupported or unset target architecture
 #endif
diff --git a/src/coreclr/jit/emitfmts.h b/src/coreclr/jit/emitfmts.h
index c252c0b1237d3..77712ed95cce3 100644
--- a/src/coreclr/jit/emitfmts.h
+++ b/src/coreclr/jit/emitfmts.h
@@ -8,6 +8,8 @@
 #include "emitfmtsarm.h"
 #elif defined(TARGET_ARM64)
 #include "emitfmtsarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "emitfmtsloongarch64.h"
 #else
 #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h
new file mode 100644
index 0000000000000..3dab2b7dc2704
--- /dev/null
+++ b/src/coreclr/jit/emitfmtsloongarch64.h
@@ -0,0 +1,46 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//////////////////////////////////////////////////////////////////////////////
+// define this file for LoongArch64 just for avoiding compiling errors.
+// This is moot right now.
+
+// clang-format off
+#if !defined(TARGET_LOONGARCH64)
+#error Unexpected target type
+#endif
+
+#ifdef DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+enum ID_OPS
+{
+    ID_OP_NONE, // no additional arguments
+};
+
+#undef DEFINE_ID_OPS
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat   instruction            enum ID_OPS
+//                  scheduling
+//                  (unused)
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE, IS_NONE, NONE)
+
+//////////////////////////////////////////////////////////////////////////////
+#undef IF_DEF
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h
index 484eca3399b4e..82c78299efebd 100644
--- a/src/coreclr/jit/emitinl.h
+++ b/src/coreclr/jit/emitinl.h
@@ -335,6 +335,36 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
 
     id->idReg2((regNumber)encodeMask); // Save in idReg2
 
+#elif defined(TARGET_LOONGARCH64)
+    assert(REGNUM_BITS >= 5);
+    encodeMask = 0;
+
+    if ((regmask & RBM_S0) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_S1) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_S2) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_S3) != RBM_NONE)
+        encodeMask |= 0x08;
+    if ((regmask & RBM_S4) != RBM_NONE)
+        encodeMask |= 0x10;
+
+    id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+    encodeMask = 0;
+
+    if ((regmask & RBM_S5) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_S6) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_S7) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_S8) != RBM_NONE)
+        encodeMask |= 0x08;
+
+    id->idReg2((regNumber)encodeMask); // Save in idReg2
+
 #else
     NYI("unknown target");
 #endif
@@ -447,6 +477,32 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
     if ((encodeMask & 0x10) != 0)
         regmask |= RBM_R28;
 
+#elif defined(TARGET_LOONGARCH64)
+    assert(REGNUM_BITS >= 5);
+    encodeMask = id->idReg1();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_S0;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_S1;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_S2;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_S3;
+    if ((encodeMask & 0x10) != 0)
+        regmask |= RBM_S4;
+
+    encodeMask = id->idReg2();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_S5;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_S6;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_S7;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_S8;
+
 #else
     NYI("unknown target");
 #endif
diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h
index 4ed340302119d..cd10727f6eec3 100644
--- a/src/coreclr/jit/emitjmps.h
+++ b/src/coreclr/jit/emitjmps.h
@@ -46,6 +46,13 @@ JMP_SMALL(lt    , ge    , blt    )  // LT
 JMP_SMALL(gt    , le    , bgt    )  // GT
 JMP_SMALL(le    , gt    , ble    )  // LE
 
+#elif defined(TARGET_LOONGARCH64)
+
+// TODO-LOONGARCH64: adding other condition branches.
+JMP_SMALL(jmp   , jmp   , b      )
+JMP_SMALL(eq    , ne    , beq    )  // EQ
+JMP_SMALL(ne    , eq    , bne    )  // NE
+
 #else
   #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
new file mode 100644
index 0000000000000..9fb3e1f9cac1c
--- /dev/null
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -0,0 +1,6781 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             emitloongarch64.cpp                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(TARGET_LOONGARCH64)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+////These are used for loongarch64 instrs's dump.
+////LA_OP_2R  opcode: bit31 ~ bit10
+#define LA_2R_CLO_W 0x4
+#define LA_2R_CLZ_W 0x5
+#define LA_2R_CTO_W 0x6
+#define LA_2R_CTZ_W 0x7
+#define LA_2R_CLO_D 0x8
+#define LA_2R_CLZ_D 0x9
+#define LA_2R_CTO_D 0xa
+#define LA_2R_CTZ_D 0xb
+#define LA_2R_REVB_2H 0xc
+#define LA_2R_REVB_4H 0xd
+#define LA_2R_REVB_2W 0xe
+#define LA_2R_REVB_D 0xf
+#define LA_2R_REVH_2W 0x10
+#define LA_2R_REVH_D 0x11
+#define LA_2R_BITREV_4B 0x12
+#define LA_2R_BITREV_8B 0x13
+#define LA_2R_BITREV_W 0x14
+#define LA_2R_BITREV_D 0x15
+#define LA_2R_EXT_W_H 0x16
+#define LA_2R_EXT_W_B 0x17
+#define LA_2R_RDTIMEL_W 0x18
+#define LA_2R_RDTIMEH_W 0x19
+#define LA_2R_RDTIME_D 0x1a
+#define LA_2R_CPUCFG 0x1b
+#define LA_2R_ASRTLE_D 0x2
+#define LA_2R_ASRTGT_D 0x3
+#define LA_2R_FABS_S 0x4501
+#define LA_2R_FABS_D 0x4502
+#define LA_2R_FNEG_S 0x4505
+#define LA_2R_FNEG_D 0x4506
+#define LA_2R_FLOGB_S 0x4509
+#define LA_2R_FLOGB_D 0x450a
+#define LA_2R_FCLASS_S 0x450d
+#define LA_2R_FCLASS_D 0x450e
+#define LA_2R_FSQRT_S 0x4511
+#define LA_2R_FSQRT_D 0x4512
+#define LA_2R_FRECIP_S 0x4515
+#define LA_2R_FRECIP_D 0x4516
+#define LA_2R_FRSQRT_S 0x4519
+#define LA_2R_FRSQRT_D 0x451a
+#define LA_2R_FMOV_S 0x4525
+#define LA_2R_FMOV_D 0x4526
+#define LA_2R_MOVGR2FR_W 0x4529
+#define LA_2R_MOVGR2FR_D 0x452a
+#define LA_2R_MOVGR2FRH_W 0x452b
+#define LA_2R_MOVFR2GR_S 0x452d
+#define LA_2R_MOVFR2GR_D 0x452e
+#define LA_2R_MOVFRH2GR_S 0x452f
+#define LA_2R_MOVGR2FCSR 0x4530
+#define LA_2R_MOVFCSR2GR 0x4532
+#define LA_2R_MOVFR2CF 0x4534
+#define LA_2R_MOVCF2FR 0x4535
+#define LA_2R_MOVGR2CF 0x4536
+#define LA_2R_MOVCF2GR 0x4537
+#define LA_2R_FCVT_S_D 0x4646
+#define LA_2R_FCVT_D_S 0x4649
+#define LA_2R_FTINTRM_W_S 0x4681
+#define LA_2R_FTINTRM_W_D 0x4682
+#define LA_2R_FTINTRM_L_S 0x4689
+#define LA_2R_FTINTRM_L_D 0x468a
+#define LA_2R_FTINTRP_W_S 0x4691
+#define LA_2R_FTINTRP_W_D 0x4692
+#define LA_2R_FTINTRP_L_S 0x4699
+#define LA_2R_FTINTRP_L_D 0x469a
+#define LA_2R_FTINTRZ_W_S 0x46a1
+#define LA_2R_FTINTRZ_W_D 0x46a2
+#define LA_2R_FTINTRZ_L_S 0x46a9
+#define LA_2R_FTINTRZ_L_D 0x46aa
+#define LA_2R_FTINTRNE_W_S 0x46b1
+#define LA_2R_FTINTRNE_W_D 0x46b2
+#define LA_2R_FTINTRNE_L_S 0x46b9
+#define LA_2R_FTINTRNE_L_D 0x46ba
+#define LA_2R_FTINT_W_S 0x46c1
+#define LA_2R_FTINT_W_D 0x46c2
+#define LA_2R_FTINT_L_S 0x46c9
+#define LA_2R_FTINT_L_D 0x46ca
+#define LA_2R_FFINT_S_W 0x4744
+#define LA_2R_FFINT_S_L 0x4746
+#define LA_2R_FFINT_D_W 0x4748
+#define LA_2R_FFINT_D_L 0x474a
+#define LA_2R_FRINT_S 0x4791
+#define LA_2R_FRINT_D 0x4792
+#define LA_2R_IOCSRRD_B 0x19200
+#define LA_2R_IOCSRRD_H 0x19201
+#define LA_2R_IOCSRRD_W 0x19202
+#define LA_2R_IOCSRRD_D 0x19203
+#define LA_2R_IOCSRWR_B 0x19204
+#define LA_2R_IOCSRWR_H 0x19205
+#define LA_2R_IOCSRWR_W 0x19206
+#define LA_2R_IOCSRWR_D 0x19207
+
+////LA_OP_3R  opcode: bit31 ~ bit15
+#define LA_3R_ADD_W 0x20
+#define LA_3R_ADD_D 0x21
+#define LA_3R_SUB_W 0x22
+#define LA_3R_SUB_D 0x23
+#define LA_3R_SLT 0x24
+#define LA_3R_SLTU 0x25
+#define LA_3R_MASKEQZ 0x26
+#define LA_3R_MASKNEZ 0x27
+#define LA_3R_NOR 0x28
+#define LA_3R_AND 0x29
+#define LA_3R_OR 0x2a
+#define LA_3R_XOR 0x2b
+#define LA_3R_ORN 0x2c
+#define LA_3R_ANDN 0x2d
+#define LA_3R_SLL_W 0x2e
+#define LA_3R_SRL_W 0x2f
+#define LA_3R_SRA_W 0x30
+#define LA_3R_SLL_D 0x31
+#define LA_3R_SRL_D 0x32
+#define LA_3R_SRA_D 0x33
+#define LA_3R_ROTR_W 0x36
+#define LA_3R_ROTR_D 0x37
+#define LA_3R_MUL_W 0x38
+#define LA_3R_MULH_W 0x39
+#define LA_3R_MULH_WU 0x3a
+#define LA_3R_MUL_D 0x3b
+#define LA_3R_MULH_D 0x3c
+#define LA_3R_MULH_DU 0x3d
+#define LA_3R_MULW_D_W 0x3e
+#define LA_3R_MULW_D_WU 0x3f
+#define LA_3R_DIV_W 0x40
+#define LA_3R_MOD_W 0x41
+#define LA_3R_DIV_WU 0x42
+#define LA_3R_MOD_WU 0x43
+#define LA_3R_DIV_D 0x44
+#define LA_3R_MOD_D 0x45
+#define LA_3R_DIV_DU 0x46
+#define LA_3R_MOD_DU 0x47
+#define LA_3R_CRC_W_B_W 0x48
+#define LA_3R_CRC_W_H_W 0x49
+#define LA_3R_CRC_W_W_W 0x4a
+#define LA_3R_CRC_W_D_W 0x4b
+#define LA_3R_CRCC_W_B_W 0x4c
+#define LA_3R_CRCC_W_H_W 0x4d
+#define LA_3R_CRCC_W_W_W 0x4e
+#define LA_3R_CRCC_W_D_W 0x4f
+#define LA_3R_FADD_S 0x201
+#define LA_3R_FADD_D 0x202
+#define LA_3R_FSUB_S 0x205
+#define LA_3R_FSUB_D 0x206
+#define LA_3R_FMUL_S 0x209
+#define LA_3R_FMUL_D 0x20a
+#define LA_3R_FDIV_S 0x20d
+#define LA_3R_FDIV_D 0x20e
+#define LA_3R_FMAX_S 0x211
+#define LA_3R_FMAX_D 0x212
+#define LA_3R_FMIN_S 0x215
+#define LA_3R_FMIN_D 0x216
+#define LA_3R_FMAXA_S 0x219
+#define LA_3R_FMAXA_D 0x21a
+#define LA_3R_FMINA_S 0x21d
+#define LA_3R_FMINA_D 0x21e
+#define LA_3R_FSCALEB_S 0x221
+#define LA_3R_FSCALEB_D 0x222
+#define LA_3R_FCOPYSIGN_S 0x225
+#define LA_3R_FCOPYSIGN_D 0x226
+#define LA_3R_INVTLB 0xc91
+#define LA_3R_LDX_B 0x7000
+#define LA_3R_LDX_H 0x7008
+#define LA_3R_LDX_W 0x7010
+#define LA_3R_LDX_D 0x7018
+#define LA_3R_STX_B 0x7020
+#define LA_3R_STX_H 0x7028
+#define LA_3R_STX_W 0x7030
+#define LA_3R_STX_D 0x7038
+#define LA_3R_LDX_BU 0x7040
+#define LA_3R_LDX_HU 0x7048
+#define LA_3R_LDX_WU 0x7050
+#define LA_3R_PRELDX 0x7058
+#define LA_3R_FLDX_S 0x7060
+#define LA_3R_FLDX_D 0x7068
+#define LA_3R_FSTX_S 0x7070
+#define LA_3R_FSTX_D 0x7078
+#define LA_3R_AMSWAP_W 0x70c0
+#define LA_3R_AMSWAP_D 0x70c1
+#define LA_3R_AMADD_W 0x70c2
+#define LA_3R_AMADD_D 0x70c3
+#define LA_3R_AMAND_W 0x70c4
+#define LA_3R_AMAND_D 0x70c5
+#define LA_3R_AMOR_W 0x70c6
+#define LA_3R_AMOR_D 0x70c7
+#define LA_3R_AMXOR_W 0x70c8
+#define LA_3R_AMXOR_D 0x70c9
+#define LA_3R_AMMAX_W 0x70ca
+#define LA_3R_AMMAX_D 0x70cb
+#define LA_3R_AMMIN_W 0x70cc
+#define LA_3R_AMMIN_D 0x70cd
+#define LA_3R_AMMAX_WU 0x70ce
+#define LA_3R_AMMAX_DU 0x70cf
+#define LA_3R_AMMIN_WU 0x70d0
+#define LA_3R_AMMIN_DU 0x70d1
+#define LA_3R_AMSWAP_DB_W 0x70d2
+#define LA_3R_AMSWAP_DB_D 0x70d3
+#define LA_3R_AMADD_DB_W 0x70d4
+#define LA_3R_AMADD_DB_D 0x70d5
+#define LA_3R_AMAND_DB_W 0x70d6
+#define LA_3R_AMAND_DB_D 0x70d7
+#define LA_3R_AMOR_DB_W 0x70d8
+#define LA_3R_AMOR_DB_D 0x70d9
+#define LA_3R_AMXOR_DB_W 0x70da
+#define LA_3R_AMXOR_DB_D 0x70db
+#define LA_3R_AMMAX_DB_W 0x70dc
+#define LA_3R_AMMAX_DB_D 0x70dd
+#define LA_3R_AMMIN_DB_W 0x70de
+#define LA_3R_AMMIN_DB_D 0x70df
+#define LA_3R_AMMAX_DB_WU 0x70e0
+#define LA_3R_AMMAX_DB_DU 0x70e1
+#define LA_3R_AMMIN_DB_WU 0x70e2
+#define LA_3R_AMMIN_DB_DU 0x70e3
+#define LA_3R_FLDGT_S 0x70e8
+#define LA_3R_FLDGT_D 0x70e9
+#define LA_3R_FLDLE_S 0x70ea
+#define LA_3R_FLDLE_D 0x70eb
+#define LA_3R_FSTGT_S 0x70ec
+#define LA_3R_FSTGT_D 0x70ed
+#define LA_3R_FSTLE_S 0x70ee
+#define LA_3R_FSTLE_D 0x70ef
+#define LA_3R_LDGT_B 0x70f0
+#define LA_3R_LDGT_H 0x70f1
+#define LA_3R_LDGT_W 0x70f2
+#define LA_3R_LDGT_D 0x70f3
+#define LA_3R_LDLE_B 0x70f4
+#define LA_3R_LDLE_H 0x70f5
+#define LA_3R_LDLE_W 0x70f6
+#define LA_3R_LDLE_D 0x70f7
+#define LA_3R_STGT_B 0x70f8
+#define LA_3R_STGT_H 0x70f9
+#define LA_3R_STGT_W 0x70fa
+#define LA_3R_STGT_D 0x70fb
+#define LA_3R_STLE_B 0x70fc
+#define LA_3R_STLE_H 0x70fd
+#define LA_3R_STLE_W 0x70fe
+#define LA_3R_STLE_D 0x70ff
+
+////LA_OP_4R opcode: bit31 ~ bit20
+#define LA_4R_FMADD_S 0x81
+#define LA_4R_FMADD_D 0x82
+#define LA_4R_FMSUB_S 0x85
+#define LA_4R_FMSUB_D 0x86
+#define LA_4R_FNMADD_S 0x89
+#define LA_4R_FNMADD_D 0x8a
+#define LA_4R_FNMSUB_S 0x8d
+#define LA_4R_FNMSUB_D 0x8e
+#define LA_4R_FSEL 0xd0
+
+////LA_OP_2RI8
+
+////LA_OP_2RI12 opcode: bit31 ~ bit22
+#define LA_2RI12_SLTI 0x8
+#define LA_2RI12_SLTUI 0x9
+#define LA_2RI12_ADDI_W 0xa
+#define LA_2RI12_ADDI_D 0xb
+#define LA_2RI12_LU52I_D 0xc
+#define LA_2RI12_ANDI 0xd
+#define LA_2RI12_ORI 0xe
+#define LA_2RI12_XORI 0xf
+#define LA_2RI12_CACHE 0x18
+#define LA_2RI12_LD_B 0xa0
+#define LA_2RI12_LD_H 0xa1
+#define LA_2RI12_LD_W 0xa2
+#define LA_2RI12_LD_D 0xa3
+#define LA_2RI12_ST_B 0xa4
+#define LA_2RI12_ST_H 0xa5
+#define LA_2RI12_ST_W 0xa6
+#define LA_2RI12_ST_D 0xa7
+#define LA_2RI12_LD_BU 0xa8
+#define LA_2RI12_LD_HU 0xa9
+#define LA_2RI12_LD_WU 0xaa
+#define LA_2RI12_PRELD 0xab
+#define LA_2RI12_FLD_S 0xac
+#define LA_2RI12_FST_S 0xad
+#define LA_2RI12_FLD_D 0xae
+#define LA_2RI12_FST_D 0xaf
+
+////LA_OP_2RI14i opcode: bit31 ~ bit24
+#define LA_2RI14_LL_W 0x20
+#define LA_2RI14_SC_W 0x21
+#define LA_2RI14_LL_D 0x22
+#define LA_2RI14_SC_D 0x23
+#define LA_2RI14_LDPTR_W 0x24
+#define LA_2RI14_STPTR_W 0x25
+#define LA_2RI14_LDPTR_D 0x26
+#define LA_2RI14_STPTR_D 0x27
+
+////LA_OP_2RI16 opcode: bit31 ~ bit26
+#define LA_2RI16_ADDU16I_D 0x4
+#define LA_2RI16_JIRL 0x13
+#define LA_2RI16_BEQ 0x16
+#define LA_2RI16_BNE 0x17
+#define LA_2RI16_BLT 0x18
+#define LA_2RI16_BGE 0x19
+#define LA_2RI16_BLTU 0x1a
+#define LA_2RI16_BGEU 0x1b
+
+////LA_OP_1RI20 opcode: bit31 ~ bit25
+#define LA_1RI20_LU12I_W 0xa
+#define LA_1RI20_LU32I_D 0xb
+#define LA_1RI20_PCADDI 0xc
+#define LA_1RI20_PCALAU12I 0xd
+#define LA_1RI20_PCADDU12I 0xe
+#define LA_1RI20_PCADDU18I 0xf
+
+////LA_OP_I26
+#define LA_I26_B 0x14
+#define LA_I26_BL 0x15
+
+////LA_OP_1RI21
+#define LA_1RI21_BEQZ 0x10
+#define LA_1RI21_BNEZ 0x11
+#define LA_1RI21_BCEQZ 0x12
+#define LA_1RI21_BCNEZ 0x12
+
+////other
+#define LA_OP_ALSL_W 0x1
+#define LA_OP_ALSL_WU 0x1
+#define LA_OP_ALSL_D 0xb
+#define LA_OP_BYTEPICK_W 0x2
+#define LA_OP_BYTEPICK_D 0x3
+#define LA_OP_BREAK 0x54
+#define LA_OP_DBGCALL 0x55
+#define LA_OP_SYSCALL 0x56
+#define LA_OP_SLLI_W 0x10
+#define LA_OP_SLLI_D 0x10
+#define LA_OP_SRLI_W 0x11
+#define LA_OP_SRLI_D 0x11
+#define LA_OP_SRAI_W 0x12
+#define LA_OP_SRAI_D 0x12
+#define LA_OP_ROTRI_W 0x13
+#define LA_OP_ROTRI_D 0x13
+#define LA_OP_FCMP_cond_S 0xc1
+#define LA_OP_FCMP_cond_D 0xc2
+#define LA_OP_BSTRINS_W 0x1
+#define LA_OP_BSTRPICK_W 0x1
+#define LA_OP_BSTRINS_D 0x2
+#define LA_OP_BSTRPICK_D 0x3
+#define LA_OP_DBAR 0x70e4
+#define LA_OP_IBAR 0x70e5
+
+//// add other define-macro here.
+
+/*****************************************************************************/
+
+const instruction emitJumpKindInstructions[] = {
+    INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+    EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+    assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+    return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+* Look up the jump kind for an instruction. It better be a conditional
+* branch instruction with a jump kind!
+*/
+
+/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
+{
+    NYI_LOONGARCH64("emitInsToJumpKind-----unimplemented on LOONGARCH64 yet----");
+    return EJ_NONE;
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+    assert(jumpKind < EJ_COUNT);
+    return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ *
+ *  Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+    if (emitIsScnsInsDsc(id))
+        return SMALL_IDSC_SIZE;
+
+    insOpts insOp = id->idInsOpt();
+
+    switch (insOp)
+    {
+        case INS_OPTS_JIRL:
+        case INS_OPTS_J_cond:
+        case INS_OPTS_J:
+            return sizeof(instrDescJmp);
+
+        case INS_OPTS_C:
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" call descriptor */
+                return sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+                return sizeof(instrDesc);
+            }
+
+        case INS_OPTS_I:
+        case INS_OPTS_RC:
+        case INS_OPTS_RL:
+        case INS_OPTS_RELOC:
+        case INS_OPTS_NONE:
+            return sizeof(instrDesc);
+        default:
+            NO_WAY("unexpected instruction descriptor format");
+            break;
+    }
+}
+
+inline bool emitter::emitInsMayWriteToGCReg(instruction ins)
+{
+    assert(ins != INS_invalid);
+    // NOTE: please reference the file "instrsloongarch64.h" for details !!!
+    return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false;
+}
+
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+    if (!id->idIsLclVar())
+        return false;
+
+    instruction ins = id->idIns();
+
+    // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+    // We don't accept writing to float local vars.
+
+    switch (ins)
+    {
+        case INS_st_d:
+        case INS_st_w:
+        case INS_st_b:
+        case INS_st_h:
+        case INS_stptr_d:
+        case INS_stx_d:
+        case INS_stx_w:
+        case INS_stx_b:
+        case INS_stx_h:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+#define LD 1
+#define ST 2
+
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+    #define INST(id, nm, info, e1) info,
+    #include "instrs.h"
+};
+// clang-format on
+
+//------------------------------------------------------------------------
+// emitInsLoad: Returns true if the instruction is some kind of load instruction.
+//
+bool emitter::emitInsIsLoad(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & LD) != 0;
+    else
+        return false;
+}
+
+//------------------------------------------------------------------------
+// emitInsIsStore: Returns true if the instruction is some kind of store instruction.
+//
+bool emitter::emitInsIsStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & ST) != 0;
+    else
+        return false;
+}
+
+//-------------------------------------------------------------------------
+// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction.
+//
+bool emitter::emitInsIsLoadOrStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0;
+    else
+        return false;
+}
+
+#undef LD
+#undef ST
+
+/*****************************************************************************
+ *
+ *  Returns the specific encoding of the given CPU instruction.
+ */
+
+inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
+{
+    code_t code = BAD_CODE;
+
+    // clang-format off
+    const static code_t insCode[] =
+    {
+        #define INST(id, nm, info, e1) e1,
+        #include "instrs.h"
+    };
+    // clang-format on
+
+    code = insCode[ins];
+
+    assert((code != BAD_CODE));
+
+    return code;
+}
+
+/****************************************************************************
+ *
+ *  Add an instruction with no operands.
+ */
+
+void emitter::emitIns(instruction ins)
+{
+    // instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
+    instrDesc* id = emitNewInstr(EA_8BYTE);
+
+    id->idIns(ins);
+    id->idAddr()->iiaSetInstrEncode(emitInsCode(ins));
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an Load/Store instruction(s): base+offset and base-addr-computing if needed.
+ *  For referencing a stack-based local variable and a register
+ *
+ *  Special notes for LoongArch64:
+ *    The parameter `offs` has special info.
+ *    The real value of `offs` is positive.
+ *    If the `offs` is negtive which its real value abs(offs),
+ *    the negtive `offs` is special for optimizing the large offset which >2047.
+ *    when offs >2047 we can't encode one instruction to load/store the data,
+ *    if there are several load/store at this case, you have to repeat the similar
+ *    large offs with reduntant instructions and maybe eat up the `SC_IG_BUFFER_SIZE`.
+ *
+ *    Optimize the following:
+ *      lu12i.w  x0, 0x0
+ *      ori  x0, x0, 0x9ac
+ *      add.d  x0, x0, fp
+ *      fst.s  fa0, x0, 0
+ *
+ *    For the offs within range [0,0x7ff], using one instruction:
+ *      ori  x0, x0, offs
+ *    For the offs within range [0x1000,0xffffffff], using two instruction
+ *      lu12i.w  x0, offs-hi-20bits
+ *      ori  x0, x0, offs-low-12bits
+ *
+ *    Store/Load the data:
+ *      fstx.s  fa0, x0, fp
+ *
+ *    If the store/load are repeated,
+ *      addi_d  x0,x0,sizeof(type)
+ *      fstx.s  fa0, x0, fp
+ *
+ */
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    ssize_t imm;
+
+    emitAttr size = EA_SIZE(attr);
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_st_b:
+        case INS_st_h:
+
+        case INS_st_w:
+        case INS_fst_s:
+
+        case INS_st_d:
+        case INS_fst_d:
+            break;
+
+        default:
+            NYI("emitIns_S_R");
+            return;
+
+    } // end switch (ins)
+#endif
+
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
+
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    imm  = offs < 0 ? -offs - 8 : base + offs;
+
+    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg2           = offs < 0 ? REG_R21 : reg2;
+    offs           = offs < 0 ? -offs - 8 : offs;
+
+    if ((-2048 <= imm) && (imm < 2048))
+    {
+        // regs[1] = reg2;
+    }
+    else
+    {
+        ssize_t imm3 = imm & 0x800;
+        ssize_t imm2 = imm + imm3;
+        assert(isValidSimm20(imm2 >> 12));
+        emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12);
+
+        emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
+
+        imm2 = imm2 & 0x7ff;
+        imm  = imm3 ? imm2 - imm3 : imm2;
+
+        reg2 = REG_RA;
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idReg1(reg1);
+
+    id->idReg2(reg2);
+
+    id->idIns(ins);
+
+    code_t code = emitInsCode(ins);
+    code |= (code_t)(reg1 & 0x1f);
+    code |= (code_t)reg2 << 5;
+    code |= (code_t)(imm & 0xfff) << 10;
+
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*
+ *  Special notes for `offs`, please see the comment for `emitter::emitIns_S_R`.
+ */
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    ssize_t imm;
+
+    emitAttr size = EA_SIZE(attr);
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_ld_b:
+        case INS_ld_bu:
+
+        case INS_ld_h:
+        case INS_ld_hu:
+
+        case INS_ld_w:
+        case INS_ld_wu:
+        case INS_fld_s:
+
+        case INS_ld_d:
+        case INS_fld_d:
+
+            break;
+
+        case INS_lea:
+            assert(size == EA_8BYTE);
+            break;
+
+        default:
+            NYI("emitIns_R_S");
+            return;
+
+    } // end switch (ins)
+#endif
+
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
+
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    imm  = offs < 0 ? -offs - 8 : base + offs;
+
+    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg2           = offs < 0 ? REG_R21 : reg2;
+    offs           = offs < 0 ? -offs - 8 : offs;
+
+    reg1 = (regNumber)((char)reg1 & 0x1f);
+    code_t code;
+    if ((-2048 <= imm) && (imm < 2048))
+    {
+        if (ins == INS_lea)
+        {
+            ins = INS_addi_d;
+        }
+        code = emitInsCode(ins);
+        code |= (code_t)(reg1 & 0x1f);
+        code |= (code_t)reg2 << 5;
+        code |= (imm & 0xfff) << 10;
+    }
+    else
+    {
+        if (ins == INS_lea)
+        {
+            assert(isValidSimm20(imm >> 12));
+            emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm >> 12);
+            ssize_t imm2 = imm & 0xfff;
+            emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_RA, REG_RA, imm2);
+
+            ins  = INS_add_d;
+            code = emitInsCode(ins);
+            code |= (code_t)reg1;
+            code |= (code_t)reg2 << 5;
+            code |= (code_t)REG_RA << 10;
+        }
+        else
+        {
+            ssize_t imm3 = imm & 0x800;
+            ssize_t imm2 = imm + imm3;
+            assert(isValidSimm20(imm2 >> 12));
+            emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12);
+
+            emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
+
+            imm2 = imm2 & 0x7ff;
+            imm3 = imm3 ? imm2 - imm3 : imm2;
+            code = emitInsCode(ins);
+            code |= (code_t)reg1;
+            code |= (code_t)REG_RA << 5;
+            code |= (code_t)(imm3 & 0xfff) << 10;
+        }
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idReg1(reg1);
+
+    id->idIns(ins);
+
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a single immediate value.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+    code_t code = emitInsCode(ins);
+
+    switch (ins)
+    {
+        case INS_b:
+        case INS_bl:
+            assert(!(imm & 0x3));
+            code |= ((imm >> 18) & 0x3ff);       // offs[25:16]
+            code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0]
+            break;
+        case INS_dbar:
+        case INS_ibar:
+            assert((0 <= imm) && (imm <= 0x7fff));
+            code |= (imm & 0x7fff); // hint
+            break;
+        default:
+            unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs)
+{
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_bceqz:
+        case INS_bcnez:
+            break;
+
+        default:
+            unreached();
+    }
+#endif
+
+    code_t code = emitInsCode(ins);
+
+    assert(!(offs & 0x3));
+    assert(!(cc >> 3));
+    code |= ((cc & 0x7) << 5);            // cj
+    code |= ((offs >> 18) & 0x1f);        // offs[20:16]
+    code |= ((offs >> 2) & 0xffff) << 10; // offs[15:0]
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+    code_t code = emitInsCode(ins);
+
+    switch (ins)
+    {
+        case INS_lu12i_w:
+        case INS_lu32i_d:
+        case INS_pcaddi:
+        case INS_pcalau12i:
+        case INS_pcaddu12i:
+        case INS_pcaddu18i:
+            assert(isGeneralRegister(reg));
+            assert((-524288 <= imm) && (imm < 524288));
+
+            code |= reg;                  // rd
+            code |= (imm & 0xfffff) << 5; // si20
+            break;
+        case INS_beqz:
+        case INS_bnez:
+            assert(isGeneralRegisterOrR0(reg));
+            assert(!(imm & 0x3));
+            assert((-1048576 <= (imm >> 2)) && ((imm >> 2) <= 1048575));
+
+            code |= ((imm >> 18) & 0x1f);        // offs[20:16]
+            code |= reg << 5;                    // rj
+            code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0]
+            break;
+        case INS_movfr2cf:
+            assert(isFloatReg(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= (reg & 0x1f) << 5; // fj
+            code |= imm;               // cc
+            break;
+        case INS_movcf2fr:
+            assert(isFloatReg(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= (reg & 0x1f); // fd
+            code |= imm << 5;     // cc
+            break;
+        case INS_movgr2cf:
+            assert(isGeneralRegister(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= reg << 5; // rj
+            code |= imm;      // cc
+            break;
+        case INS_movcf2gr:
+            assert(isGeneralRegister(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= reg;      // rd
+            code |= imm << 5; // cc
+            break;
+        default:
+            unreached();
+            break;
+    } // end switch (ins)
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+//------------------------------------------------------------------------
+// emitIns_Mov: Emits a move instruction
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    dstReg    -- The destination register
+//    srcReg    -- The source register
+//    canSkip   -- true if the move can be elided when dstReg == srcReg, otherwise false
+//    insOpts   -- The instruction options
+//
+void emitter::emitIns_Mov(
+    instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
+{
+    assert(IsMovInstruction(ins));
+
+    if (!canSkip || (dstReg != srcReg))
+    {
+        if ((EA_4BYTE == attr) && (INS_mov == ins))
+            emitIns_R_R_I(INS_slli_w, attr, dstReg, srcReg, 0);
+        else
+            emitIns_R_R(ins, attr, dstReg, srcReg);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */)
+{
+    code_t code = emitInsCode(ins);
+
+    if (INS_mov == ins)
+    {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= reg1;      // rd
+        code |= reg2 << 5; // rj
+    }
+    else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_ext_w_b:
+            case INS_ext_w_h:
+            case INS_clo_w:
+            case INS_clz_w:
+            case INS_cto_w:
+            case INS_ctz_w:
+            case INS_clo_d:
+            case INS_clz_d:
+            case INS_cto_d:
+            case INS_ctz_d:
+            case INS_revb_2h:
+            case INS_revb_4h:
+            case INS_revb_2w:
+            case INS_revb_d:
+            case INS_revh_2w:
+            case INS_revh_d:
+            case INS_bitrev_4b:
+            case INS_bitrev_8b:
+            case INS_bitrev_w:
+            case INS_bitrev_d:
+            case INS_rdtimel_w:
+            case INS_rdtimeh_w:
+            case INS_rdtime_d:
+            case INS_cpucfg:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --1!");
+        }
+#endif
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= reg1;      // rd
+        code |= reg2 << 5; // rj
+    }
+    else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins))
+    {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= reg1 << 5;  // rj
+        code |= reg2 << 10; // rk
+    }
+    else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fabs_s:
+            case INS_fabs_d:
+            case INS_fneg_s:
+            case INS_fneg_d:
+            case INS_fsqrt_s:
+            case INS_fsqrt_d:
+            case INS_frsqrt_s:
+            case INS_frsqrt_d:
+            case INS_frecip_s:
+            case INS_frecip_d:
+            case INS_flogb_s:
+            case INS_flogb_d:
+            case INS_fclass_s:
+            case INS_fclass_d:
+            case INS_fcvt_s_d:
+            case INS_fcvt_d_s:
+            case INS_ffint_s_w:
+            case INS_ffint_s_l:
+            case INS_ffint_d_w:
+            case INS_ffint_d_l:
+            case INS_ftint_w_s:
+            case INS_ftint_w_d:
+            case INS_ftint_l_s:
+            case INS_ftint_l_d:
+            case INS_ftintrm_w_s:
+            case INS_ftintrm_w_d:
+            case INS_ftintrm_l_s:
+            case INS_ftintrm_l_d:
+            case INS_ftintrp_w_s:
+            case INS_ftintrp_w_d:
+            case INS_ftintrp_l_s:
+            case INS_ftintrp_l_d:
+            case INS_ftintrz_w_s:
+            case INS_ftintrz_w_d:
+            case INS_ftintrz_l_s:
+            case INS_ftintrz_l_d:
+            case INS_ftintrne_w_s:
+            case INS_ftintrne_w_d:
+            case INS_ftintrne_l_s:
+            case INS_ftintrne_l_d:
+            case INS_frint_s:
+            case INS_frint_d:
+            case INS_fmov_s:
+            case INS_fmov_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --2!");
+        }
+#endif
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        code |= (reg1 & 0x1f);      // fd
+        code |= (reg2 & 0x1f) << 5; // fj
+    }
+    else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_movgr2fr_w:
+            case INS_movgr2fr_d:
+            case INS_movgr2frh_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --3!");
+        }
+#endif
+        assert(isFloatReg(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= (reg1 & 0x1f); // fd
+        code |= reg2 << 5;     // rj
+    }
+    else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_movfr2gr_s:
+            case INS_movfr2gr_d:
+            case INS_movfrh2gr_s:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!");
+        }
+#endif
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isFloatReg(reg2));
+        code |= reg1;               // rd
+        code |= (reg2 & 0x1f) << 5; // fj
+    }
+    else if ((INS_dneg == ins) || (INS_neg == ins))
+    {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        // sub_d rd, zero, rk
+        // sub_w rd, zero, rk
+        code |= reg1;       // rd
+        code |= reg2 << 10; // rk
+    }
+    else if (INS_not == ins)
+    {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        // nor rd, rj, zero
+        code |= reg1;      // rd
+        code |= reg2 << 5; // rj
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a constant.
+ */
+
+void emitter::emitIns_R_R_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+    code_t code = emitInsCode(ins);
+
+    if ((INS_slli_w <= ins) && (ins <= INS_rotri_w))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_slli_w:
+            case INS_srli_w:
+            case INS_srai_w:
+            case INS_rotri_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --1!");
+        }
+#endif
+
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((0 <= imm) && (imm <= 0x1f));
+
+        code |= reg1;               // rd
+        code |= reg2 << 5;          // rj
+        code |= (imm & 0x1f) << 10; // ui5
+    }
+    else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_slli_d:
+            case INS_srli_d:
+            case INS_srai_d:
+            case INS_rotri_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --2!");
+        }
+#endif
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((0 <= imm) && (imm <= 0x3f));
+
+        code |= reg1;               // rd
+        code |= reg2 << 5;          // rj
+        code |= (imm & 0x3f) << 10; // ui6
+    }
+    else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) ||
+             ((INS_st_b <= ins) && (ins <= INS_st_d)))
+    {
+#ifdef DEBUG
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) ||
+            ((INS_st_b <= ins) && (ins <= INS_st_d)))
+        {
+            switch (ins)
+            {
+                case INS_addi_w:
+                case INS_addi_d:
+                case INS_lu52i_d:
+                case INS_slti:
+                case INS_ld_b:
+                case INS_ld_h:
+                case INS_ld_w:
+                case INS_ld_d:
+                case INS_ld_bu:
+                case INS_ld_hu:
+                case INS_ld_wu:
+                case INS_st_b:
+                case INS_st_h:
+                case INS_st_w:
+                case INS_st_d:
+                    break;
+                default:
+                    NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --3!");
+            }
+
+            assert((-2048 <= imm) && (imm <= 2047));
+        }
+        else if (ins == INS_sltui)
+        {
+            assert((0 <= imm) && (imm <= 0x7ff));
+        }
+        else
+        {
+            switch (ins)
+            {
+                case INS_andi:
+                case INS_ori:
+                case INS_xori:
+                    break;
+                default:
+                    NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --4!");
+            }
+            assert((0 <= imm) && (imm <= 0xfff));
+        }
+#endif
+        code |= reg1;                // rd
+        code |= reg2 << 5;           // rj
+        code |= (imm & 0xfff) << 10; // si12 or ui12
+    }
+    else if ((INS_fld_s <= ins) && (ins <= INS_fst_d))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fld_s:
+            case INS_fld_d:
+            case INS_fst_s:
+            case INS_fst_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --5!");
+        }
+#endif
+        assert(isFloatReg(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-2048 <= imm) && (imm <= 2047));
+
+        code |= reg1 & 0x1f;         // fd
+        code |= reg2 << 5;           // rj
+        code |= (imm & 0xfff) << 10; // si12
+    }
+    else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w)))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_ldptr_w:
+            case INS_ldptr_d:
+            case INS_ll_w:
+            case INS_ll_d:
+            case INS_stptr_w:
+            case INS_stptr_d:
+            case INS_sc_w:
+            case INS_sc_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --6!");
+        }
+#endif
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-8192 <= imm) && (imm <= 8191));
+
+        code |= reg1;                 // rd
+        code |= reg2 << 5;            // rj
+        code |= (imm & 0x3fff) << 10; // si14
+    }
+    else if ((INS_beq <= ins) && (ins <= INS_bgeu))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_beq:
+            case INS_bne:
+            case INS_blt:
+            case INS_bltu:
+            case INS_bge:
+            case INS_bgeu:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --7!");
+        }
+#endif
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(!(imm & 0x3));
+        assert((-32768 <= (imm >> 2)) && ((imm >> 2) <= 32767));
+
+        code |= reg1 << 5;                   // rj
+        code |= reg2;                        // rd
+        code |= ((imm >> 2) & 0xffff) << 10; // offs16
+    }
+    else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fcmp_caf_s:
+            case INS_fcmp_cun_s:
+            case INS_fcmp_ceq_s:
+            case INS_fcmp_cueq_s:
+            case INS_fcmp_clt_s:
+            case INS_fcmp_cult_s:
+            case INS_fcmp_cle_s:
+            case INS_fcmp_cule_s:
+            case INS_fcmp_cne_s:
+            case INS_fcmp_cor_s:
+            case INS_fcmp_cune_s:
+            case INS_fcmp_saf_d:
+            case INS_fcmp_sun_d:
+            case INS_fcmp_seq_d:
+            case INS_fcmp_sueq_d:
+            case INS_fcmp_slt_d:
+            case INS_fcmp_sult_d:
+            case INS_fcmp_sle_d:
+            case INS_fcmp_sule_d:
+            case INS_fcmp_sne_d:
+            case INS_fcmp_sor_d:
+            case INS_fcmp_sune_d:
+            case INS_fcmp_caf_d:
+            case INS_fcmp_cun_d:
+            case INS_fcmp_ceq_d:
+            case INS_fcmp_cueq_d:
+            case INS_fcmp_clt_d:
+            case INS_fcmp_cult_d:
+            case INS_fcmp_cle_d:
+            case INS_fcmp_cule_d:
+            case INS_fcmp_cne_d:
+            case INS_fcmp_cor_d:
+            case INS_fcmp_cune_d:
+            case INS_fcmp_saf_s:
+            case INS_fcmp_sun_s:
+            case INS_fcmp_seq_s:
+            case INS_fcmp_sueq_s:
+            case INS_fcmp_slt_s:
+            case INS_fcmp_sult_s:
+            case INS_fcmp_sle_s:
+            case INS_fcmp_sule_s:
+            case INS_fcmp_sne_s:
+            case INS_fcmp_sor_s:
+            case INS_fcmp_sune_s:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --8!");
+        }
+#endif
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        assert((0 <= imm) && (imm <= 7));
+
+        code |= (reg1 & 0x1f) << 5;  // fj
+        code |= (reg2 & 0x1f) << 10; // fk
+        code |= imm & 0x7;           // cc
+    }
+    else if (INS_addu16i_d == ins)
+    {
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-32768 <= imm) && (imm < 32768));
+
+        code |= reg1;                 // rd
+        code |= reg2 << 5;            // rj
+        code |= (imm & 0xffff) << 10; // si16
+    }
+    else if (INS_jirl == ins)
+    {
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-32768 <= imm) && (imm < 32768));
+
+        code |= reg1;                 // rd
+        code |= reg2 << 5;            // rj
+        code |= (imm & 0xffff) << 10; // offs16
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+*
+*  Add an instruction referencing two registers and a constant.
+*  Also checks for a large immediate that needs a second instruction
+*  and will load it in reg1
+*
+*/
+void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
+{
+    assert(isGeneralRegister(reg1));
+    assert(reg1 != reg2);
+
+    bool immFits = true;
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_addi_w:
+        case INS_addi_d:
+        case INS_ld_d:
+            immFits = isValidSimm12(imm);
+            break;
+
+        case INS_andi:
+        case INS_ori:
+        case INS_xori:
+            immFits = (0 <= imm) && (imm <= 0xfff);
+            break;
+
+        default:
+            assert(!"Unsupported instruction in emitIns_R_R_Imm");
+    }
+#endif
+
+    if (immFits)
+    {
+        emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+    }
+    else
+    {
+        // Load 'imm' into the reg1 register
+        // then issue:   'ins'  reg1, reg2, reg1
+        //
+        assert(!EA_IS_RELOC(attr));
+        emitIns_I_la(attr, reg1, imm);
+        assert(ins == INS_ld_d);
+        emitIns_R_R_R(INS_ldx_d, attr, reg1, reg2, reg1);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers.
+ */
+
+void emitter::emitIns_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */
+{
+    code_t code = emitInsCode(ins);
+
+    if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) ||
+        ((INS_stx_b <= ins) && (ins <= INS_stle_d)))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_add_w:
+            case INS_add_d:
+            case INS_sub_w:
+            case INS_sub_d:
+            case INS_and:
+            case INS_or:
+            case INS_nor:
+            case INS_xor:
+            case INS_andn:
+            case INS_orn:
+
+            case INS_mul_w:
+            case INS_mul_d:
+            case INS_mulh_w:
+            case INS_mulh_wu:
+            case INS_mulh_d:
+            case INS_mulh_du:
+            case INS_mulw_d_w:
+            case INS_mulw_d_wu:
+            case INS_div_w:
+            case INS_div_wu:
+            case INS_div_d:
+            case INS_div_du:
+            case INS_mod_w:
+            case INS_mod_wu:
+            case INS_mod_d:
+            case INS_mod_du:
+
+            case INS_sll_w:
+            case INS_srl_w:
+            case INS_sra_w:
+            case INS_rotr_w:
+            case INS_sll_d:
+            case INS_srl_d:
+            case INS_sra_d:
+            case INS_rotr_d:
+
+            case INS_maskeqz:
+            case INS_masknez:
+
+            case INS_slt:
+            case INS_sltu:
+
+            case INS_ldx_b:
+            case INS_ldx_h:
+            case INS_ldx_w:
+            case INS_ldx_d:
+            case INS_ldx_bu:
+            case INS_ldx_hu:
+            case INS_ldx_wu:
+            case INS_stx_b:
+            case INS_stx_h:
+            case INS_stx_w:
+            case INS_stx_d:
+
+            case INS_ldgt_b:
+            case INS_ldgt_h:
+            case INS_ldgt_w:
+            case INS_ldgt_d:
+            case INS_ldle_b:
+            case INS_ldle_h:
+            case INS_ldle_w:
+            case INS_ldle_d:
+            case INS_stgt_b:
+            case INS_stgt_h:
+            case INS_stgt_w:
+            case INS_stgt_d:
+            case INS_stle_b:
+            case INS_stle_h:
+            case INS_stle_w:
+            case INS_stle_d:
+
+            case INS_amswap_w:
+            case INS_amswap_d:
+            case INS_amswap_db_w:
+            case INS_amswap_db_d:
+            case INS_amadd_w:
+            case INS_amadd_d:
+            case INS_amadd_db_w:
+            case INS_amadd_db_d:
+            case INS_amand_w:
+            case INS_amand_d:
+            case INS_amand_db_w:
+            case INS_amand_db_d:
+            case INS_amor_w:
+            case INS_amor_d:
+            case INS_amor_db_w:
+            case INS_amor_db_d:
+            case INS_amxor_w:
+            case INS_amxor_d:
+            case INS_amxor_db_w:
+            case INS_amxor_db_d:
+            case INS_ammax_w:
+            case INS_ammax_d:
+            case INS_ammax_db_w:
+            case INS_ammax_db_d:
+            case INS_ammin_w:
+            case INS_ammin_d:
+            case INS_ammin_db_w:
+            case INS_ammin_db_d:
+            case INS_ammax_wu:
+            case INS_ammax_du:
+            case INS_ammax_db_wu:
+            case INS_ammax_db_du:
+            case INS_ammin_wu:
+            case INS_ammin_du:
+            case INS_ammin_db_wu:
+            case INS_ammin_db_du:
+
+            case INS_crc_w_b_w:
+            case INS_crc_w_h_w:
+            case INS_crc_w_w_w:
+            case INS_crc_w_d_w:
+            case INS_crcc_w_b_w:
+            case INS_crcc_w_h_w:
+            case INS_crcc_w_w_w:
+            case INS_crcc_w_d_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --1!");
+        }
+#endif
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+
+        code |= (reg1 /*& 0x1f*/);       // rd
+        code |= (reg2 /*& 0x1f*/) << 5;  // rj
+        code |= (reg3 /*& 0x1f*/) << 10; // rk
+    }
+    else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fadd_s:
+            case INS_fadd_d:
+            case INS_fsub_s:
+            case INS_fsub_d:
+            case INS_fmul_s:
+            case INS_fmul_d:
+            case INS_fdiv_s:
+            case INS_fdiv_d:
+            case INS_fmax_s:
+            case INS_fmax_d:
+            case INS_fmin_s:
+            case INS_fmin_d:
+            case INS_fmaxa_s:
+            case INS_fmaxa_d:
+            case INS_fmina_s:
+            case INS_fmina_d:
+            case INS_fscaleb_s:
+            case INS_fscaleb_d:
+            case INS_fcopysign_s:
+            case INS_fcopysign_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --2!");
+        }
+#endif
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        assert(isFloatReg(reg3));
+
+        code |= (reg1 & 0x1f);       // fd
+        code |= (reg2 & 0x1f) << 5;  // fj
+        code |= (reg3 & 0x1f) << 10; // fk
+    }
+    else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fldx_s:
+            case INS_fldx_d:
+            case INS_fstx_s:
+            case INS_fstx_d:
+
+            case INS_fldgt_s:
+            case INS_fldgt_d:
+            case INS_fldle_s:
+            case INS_fldle_d:
+            case INS_fstgt_s:
+            case INS_fstgt_d:
+            case INS_fstle_s:
+            case INS_fstle_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --3!");
+        }
+#endif
+        assert(isFloatReg(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+
+        code |= reg1 & 0x1f; // fd
+        code |= reg2 << 5;   // rj
+        code |= reg3 << 10;  // rk
+    }
+    else
+    {
+        NYI_LOONGARCH64("Unsupported instruction in emitIns_R_R_R");
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers and a constant.
+ */
+
+void emitter::emitIns_R_R_R_I(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              regNumber   reg3,
+                              ssize_t     imm,
+                              insOpts     opt /* = INS_OPTS_NONE */,
+                              emitAttr    attrReg2 /* = EA_UNKNOWN */)
+{
+    code_t code = emitInsCode(ins);
+
+    if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w))
+    {
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_alsl_w:
+            case INS_alsl_wu:
+            case INS_alsl_d:
+            case INS_bytepick_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!");
+        }
+#endif
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+        assert((0 <= imm) && (imm <= 3));
+
+        code |= reg1;       // rd
+        code |= reg2 << 5;  // rj
+        code |= reg3 << 10; // rk
+        code |= imm << 15;  // sa2
+    }
+    else if (INS_bytepick_d == ins)
+    {
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+        assert((0 <= imm) && (imm <= 7));
+
+        code |= reg1;       // rd
+        code |= reg2 << 5;  // rj
+        code |= reg3 << 10; // rk
+        code |= imm << 15;  // sa3
+    }
+    else if (INS_fsel == ins)
+    {
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        assert(isFloatReg(reg3));
+        assert((0 <= imm) && (imm <= 7));
+
+        code |= (reg1 & 0x1f);       // fd
+        code |= (reg2 & 0x1f) << 5;  // fj
+        code |= (reg3 & 0x1f) << 10; // fk
+        code |= imm << 15;           // ca
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and two constants.
+ */
+
+void emitter::emitIns_R_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt)
+{
+    code_t code = emitInsCode(ins);
+
+    assert(isGeneralRegisterOrR0(reg1));
+    assert(isGeneralRegisterOrR0(reg2));
+    switch (ins)
+    {
+        case INS_bstrins_w:
+        case INS_bstrpick_w:
+            code |= (reg1 /*& 0x1f*/);      // rd
+            code |= (reg2 /*& 0x1f*/) << 5; // rj
+            assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 32));
+            code |= (imm1 & 0x1f) << 16; // msbw
+            code |= (imm2 & 0x1f) << 10; // lsbw
+            break;
+        case INS_bstrins_d:
+        case INS_bstrpick_d:
+            code |= (reg1 /*& 0x1f*/);      // rd
+            code |= (reg2 /*& 0x1f*/) << 5; // rj
+            assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 64));
+            code |= (imm1 & 0x3f) << 16; // msbd
+            code |= (imm2 & 0x3f) << 10; // lsbd
+            break;
+        default:
+            unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing four registers.
+ */
+
+void emitter::emitIns_R_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
+{
+    code_t code = emitInsCode(ins);
+
+    switch (ins)
+    {
+        case INS_fmadd_s:
+        case INS_fmadd_d:
+        case INS_fmsub_s:
+        case INS_fmsub_d:
+        case INS_fnmadd_s:
+        case INS_fnmadd_d:
+        case INS_fnmsub_s:
+        case INS_fnmsub_d:
+            assert(isFloatReg(reg1));
+            assert(isFloatReg(reg2));
+            assert(isFloatReg(reg3));
+            assert(isFloatReg(reg4));
+
+            code |= (reg1 & 0x1f);       // fd
+            code |= (reg2 & 0x1f) << 5;  // fj
+            code |= (reg3 & 0x1f) << 10; // fk
+            code |= (reg4 & 0x1f) << 15; // fa
+            break;
+        default:
+            unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a register + static member operands.
+ *  Constant is stored into JIT data which is adjacent to code.
+ *  For LOONGARCH64, maybe not the best, here just suports the func-interface.
+ *
+ */
+void emitter::emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    assert(offs >= 0);
+    assert(instrDesc::fitsInSmallCns(offs)); // can optimize.
+    // assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg).
+    // assert(isGeneralRegister(reg)); while load float the reg is FPR.
+
+    // when id->idIns == bl, for reloc! 4-ins.
+    //   pcaddu12i reg, off-hi-20bits
+    //   addi_d  reg, reg, off-lo-12bits
+    // when id->idIns == load-ins, for reloc! 4-ins.
+    //   pcaddu12i reg, off-hi-20bits
+    //   load  reg, offs_lo-12bits(reg)    #when ins is load ins.
+    //
+    // INS_OPTS_RC: ins == bl placeholders.  3-ins:  // TODO-LoongArch64: maybe optimize.
+    //   lu12i_w reg, addr-hi-20bits
+    //   ori     reg, reg, addr-lo-12bits
+    //   lu32i_d reg, addr_hi-32bits
+    //
+    // INS_OPTS_RC: ins == load.  3-ins:
+    //   lu12i_w at, offs_hi-20bits           //NOTE: offs = (int)(offs_hi<<12) + (int)offs_lo
+    //   lu32i_d at, 0xff  addr_hi-32bits
+    //   load  reg, addr_lo-12bits(reg)    #when ins is load ins.
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    assert(reg != REG_R0); // for special. reg Must not be R0.
+    id->idReg1(reg);       // destination register that will get the constant value.
+
+    id->idSmallCns(offs); // usually is 0.
+    id->idInsOpt(INS_OPTS_RC);
+    if (emitComp->opts.compReloc)
+    {
+        id->idSetIsDspReloc();
+        id->idCodeSize(8);
+    }
+    else
+        id->idCodeSize(12); // TODO-LoongArch64: maybe optimize.
+
+    if (EA_IS_GCREF(attr))
+    {
+        /* A special value indicates a GCref pointer value */
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(attr))
+    {
+        /* A special value indicates a Byref pointer value */
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+
+    // TODO-LoongArch64: this maybe deleted.
+    id->idSetIsBound(); // We won't patch address since we will know the exact distance
+                        // once JIT code and data are allocated together.
+
+    assert(addrReg == REG_NA); // NOTE: for LOONGARCH64, not support addrReg != REG_NA.
+
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
+{
+    NYI_LOONGARCH64("emitIns_R_AR-----unimplemented/unused on LOONGARCH64 yet----");
+}
+
+// This computes address from the immediate which is relocatable.
+void emitter::emitIns_R_AI(instruction ins,
+                           emitAttr    attr,
+                           regNumber   reg,
+                           ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+{
+    assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC
+    assert(ins == INS_bl);     // for special.
+    assert(isGeneralRegister(reg));
+
+    // INS_OPTS_RELOC: placeholders.  2-ins:
+    //  case:EA_HANDLE_CNS_RELOC
+    //   pcaddu12i  reg, off-hi-20bits
+    //   addi_d  reg, reg, off-lo-12bits
+    //  case:EA_PTR_DSP_RELOC
+    //   pcaddu12i  reg, off-hi-20bits
+    //   ld_d  reg, reg, off-lo-12bits
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    assert(reg != REG_R0); // for special. reg Must not be R0.
+    id->idReg1(reg);       // destination register that will get the constant value.
+
+    id->idInsOpt(INS_OPTS_RELOC);
+
+    if (EA_IS_GCREF(attr))
+    {
+        /* A special value indicates a GCref pointer value */
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(attr))
+    {
+        /* A special value indicates a Byref pointer value */
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+
+    id->idAddr()->iiaAddr = (BYTE*)addr;
+    id->idCodeSize(8);
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+    // TODO-LoongArch64: maybe delete it on future.
+    NYI_LOONGARCH64("emitSetShortJump-----unimplemented/unused on LOONGARCH64 yet----");
+}
+
+/*****************************************************************************
+ *
+ *  Add a label instruction.
+ */
+
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    assert(dst->bbFlags & BBF_HAS_LABEL);
+
+    // if for reloc!  4-ins:
+    //   pcaddu12i reg, offset-hi20
+    //   addi_d  reg, reg, offset-lo12
+    //
+    // else:  3-ins:
+    //   lu12i_w reg, dst-hi-20bits
+    //   ori reg, reg, dst-lo-12bits
+    //   bstrins_d  reg, zero, msbd, lsbd / lu32i_d reg, 0xff
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsOpt(INS_OPTS_RL);
+    id->idAddr()->iiaBBlabel = dst;
+
+    if (emitComp->opts.compReloc)
+    {
+        id->idSetIsDspReloc();
+        id->idCodeSize(8);
+    }
+    else
+        id->idCodeSize(12);
+
+    id->idReg1(reg);
+
+    if (EA_IS_GCREF(attr))
+    {
+        /* A special value indicates a GCref pointer value */
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(attr))
+    {
+        /* A special value indicates a Byref pointer value */
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+
+#ifdef DEBUG
+    // Mark the catch return
+    if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+    {
+        id->idDebugOnlyInfo()->idCatchRet = true;
+    }
+#endif // DEBUG
+
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    NYI_LOONGARCH64("emitIns_J_R-----unimplemented/unused on LOONGARCH64 yet----");
+}
+
+// NOTE:
+//  For loongarch64, emitIns_J is just only jump, not include the condition branch!
+//  The condition branch is the emitIns_J_cond_la().
+//  If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by
+//  INS_OPTS_JIRL.
+//
+//  The arg "instrCount" is two regs's encoding when ins is beq/bne/blt/bltu/bge/bgeu/beqz/bnez.
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
+{
+    if (dst == nullptr)
+    { // Now this case not used for loongarch64.
+        assert(instrCount != 0);
+        assert(ins == INS_b); // when dst==nullptr, ins is INS_b by now.
+
+        assert((-33554432 <= instrCount) && (instrCount < 33554432)); // 0x2000000.
+        emitIns_I(ins, EA_PTRSIZE, instrCount << 2); // NOTE: instrCount is the number of the instructions.
+
+        return;
+    }
+
+    //
+    // INS_OPTS_J: placeholders.  1-ins: if the dst outof-range will be replaced by INS_OPTS_JIRL.
+    //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst
+
+    assert(dst->bbFlags & BBF_HAS_LABEL);
+
+    instrDescJmp* id = emitNewInstrJmp();
+    assert((INS_bceqz <= ins) && (ins <= INS_bl));
+    id->idIns(ins);
+    id->idReg1((regNumber)(instrCount & 0x1f));
+    id->idReg2((regNumber)((instrCount >> 5) & 0x1f));
+
+    id->idInsOpt(INS_OPTS_J);
+    emitCounts_INS_OPTS_J++;
+    id->idAddr()->iiaBBlabel = dst;
+
+    if (emitComp->opts.compReloc)
+    {
+        id->idSetIsDspReloc();
+    }
+
+    id->idjShort = false;
+
+    // TODO-LoongArch64: maybe deleted this.
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress) // Force long branches
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    /* Record the jump's IG and offset within it */
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+// NOTE:
+//  For loongarch64, emitIns_J_cond_la() is the condition branch.
+//  NOTE: Only supported short branch so far !!!
+//
+void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2)
+{
+    // TODO-LoongArch64:
+    //   Now the emitIns_J_cond_la() is only the short condition branch.
+    //   There is no long condition branch for loongarch64 so far.
+    //   For loongarch64, the long condition branch is like this:
+    //     --->  branch_condition  condition_target;     //here is the condition branch, short branch is enough.
+    //     --->  jump jump_target; (this supporting the long jump.)
+    //     condition_target:
+    //     ...
+    //     ...
+    //     jump_target:
+    //
+    //
+    // INS_OPTS_J_cond: placeholders.  1-ins.
+    //   ins  reg1, reg2, dst
+
+    assert(dst != nullptr);
+    assert(dst->bbFlags & BBF_HAS_LABEL);
+
+    instrDescJmp* id = emitNewInstrJmp();
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idjShort = false;
+
+    id->idInsOpt(INS_OPTS_J_cond);
+    id->idAddr()->iiaBBlabel = dst;
+
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress) // Force long branches
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    /* Record the jump's IG and offset within it */
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    id->idCodeSize(4);
+
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
+{
+    assert(!EA_IS_RELOC(size));
+    assert(isGeneralRegister(reg));
+    // size = EA_SIZE(size);
+
+    if (-1 == (imm >> 11) || 0 == (imm >> 11))
+    {
+        emitIns_R_R_I(INS_addi_w, size, reg, REG_R0, imm);
+        return;
+    }
+
+    if (0 == (imm >> 12))
+    {
+        emitIns_R_R_I(INS_ori, size, reg, REG_R0, imm);
+        return;
+    }
+
+    instrDesc* id = emitNewInstr(size);
+
+    if ((imm == INT64_MAX) || (imm == 0xffffffff))
+    {
+        // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
+        // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
+        id->idReg2((regNumber)1); // special for INT64_MAX(ui6=1) or UINT32_MAX(ui6=32);
+        id->idCodeSize(8);
+    }
+    else if (-1 == (imm >> 31) || 0 == (imm >> 31))
+    {
+        // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
+        // emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+
+        id->idCodeSize(8);
+    }
+    else if (-1 == (imm >> 51) || 0 == (imm >> 51))
+    {
+        // low-32bits.
+        // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12);
+        // emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+        //
+        // high-20bits.
+        // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+
+        id->idCodeSize(12);
+    }
+    else
+    { // 0xffff ffff ffff ffff.
+        // low-32bits.
+        // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
+        // emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+        //
+        // high-32bits.
+        // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+        // emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52));
+
+        id->idCodeSize(16);
+    }
+
+    id->idIns(INS_lu12i_w);
+    id->idReg1(reg); // destination register that will get the constant value.
+    assert(reg != REG_R0);
+
+    id->idInsOpt(INS_OPTS_I);
+
+    id->idAddr()->iiaAddr = (BYTE*)imm;
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a call instruction (direct or indirect).
+ *      argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN       : addr is the method address
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R          : "call ireg".
+ *
+ * For LOONGARCH xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ *  Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+void emitter::emitIns_Call(EmitCallType          callType,
+                           CORINFO_METHOD_HANDLE methHnd,
+                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                           void*    addr,
+                           ssize_t  argSize,
+                           emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                           VARSET_VALARG_TP ptrVars,
+                           regMaskTP        gcrefRegs,
+                           regMaskTP        byrefRegs,
+                           const DebugInfo& di /* = DebugInfo() */,
+                           regNumber        ireg /* = REG_NA */,
+                           regNumber        xreg /* = REG_NA */,
+                           unsigned         xmul /* = 0     */,
+                           ssize_t          disp /* = 0     */,
+                           bool             isJump /* = false */)
+{
+    /* Sanity check the arguments depending on callType */
+
+    assert(callType < EC_COUNT);
+    assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert(callType < EC_INDIR_R || addr == NULL);
+    assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+
+    // LoongArch64 never uses these
+    assert(xreg == REG_NA && xmul == 0 && disp == 0);
+
+    // Our stack level should be always greater than the bytes of arguments we push. Just
+    // a sanity test.
+    assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+
+    // Trim out any callee-trashed registers from the live set.
+    regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
+    gcrefRegs &= savedSet;
+    byrefRegs &= savedSet;
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+        dumpConvertedVarSet(emitComp, ptrVars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+
+    /* Managed RetVal: emit sequence point for the call */
+    if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid())
+    {
+        codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false);
+    }
+
+    /*
+        We need to allocate the appropriate instruction descriptor based
+        on whether this is a direct/indirect call, and whether we need to
+        record an updated set of live GC variables.
+     */
+    instrDesc* id;
+
+    assert(argSize % REGSIZE_BYTES == 0);
+    int argCnt = (int)(argSize / (int)REGSIZE_BYTES);
+
+    if (callType >= EC_INDIR_R)
+    {
+        /* Indirect call, virtual calls */
+
+        assert(callType == EC_INDIR_R);
+
+        id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+    else
+    {
+        /* Helper/static/nonvirtual/function calls (direct or through handle),
+           and calls to an absolute addr. */
+
+        assert(callType == EC_FUNC_TOKEN);
+
+        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+
+    /* Update the emitter's live GC ref sets */
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+    emitThisGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = byrefRegs;
+
+    id->idSetIsNoGC(emitNoGChelper(methHnd));
+
+    /* Set the instruction - special case jumping a function */
+    instruction ins;
+
+    ins = INS_jirl; // jirl t2
+    id->idIns(ins);
+
+    id->idInsOpt(INS_OPTS_C);
+    // TODO-LoongArch64: maybe optimize.
+
+    // INS_OPTS_C: placeholders.  1/2/4-ins:
+    //   if (callType == EC_INDIR_R)
+    //      jirl REG_R0/REG_RA, ireg, 0   <---- 1-ins
+    //   else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR)
+    //     if reloc:
+    //             //pc + offset_38bits       # only when reloc.
+    //      pcaddu18i  t2, addr-hi20
+    //      jilr r0/1,t2,addr-lo18
+    //
+    //     else:
+    //      lu12i_w  t2, dst_offset_lo32-hi
+    //      ori  t2, t2, dst_offset_lo32-lo
+    //      lu32i_d  t2, dst_offset_hi32-lo
+    //      jirl REG_R0/REG_RA, t2, 0
+
+    /* Record the address: method, indirection, or funcptr */
+    if (callType == EC_INDIR_R)
+    {
+        /* This is an indirect call (either a virtual call or func ptr call) */
+        // assert(callType == EC_INDIR_R);
+
+        id->idSetIsCallRegPtr();
+
+        regNumber reg_jirl = isJump ? REG_R0 : REG_RA;
+        id->idReg4(reg_jirl);
+        id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3.
+        assert(xreg == REG_NA);
+
+        id->idCodeSize(4);
+    }
+    else
+    {
+        /* This is a simple direct call: "call helper/method/addr" */
+
+        assert(callType == EC_FUNC_TOKEN);
+        assert(addr != NULL);
+        assert((((size_t)addr) & 3) == 0);
+
+        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+
+        if (emitComp->opts.compReloc)
+        {
+            id->idSetIsDspReloc();
+            id->idCodeSize(8);
+        }
+        else
+        {
+            id->idCodeSize(16);
+        }
+    }
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        if (id->idIsLargeCall())
+        {
+            printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                   VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+        }
+    }
+
+    id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
+#endif // DEBUG
+
+#ifdef LATE_DISASM
+    if (addr != nullptr)
+    {
+        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+    }
+#endif // LATE_DISASM
+
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Output a call instruction.
+ */
+
+unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+{
+    unsigned char callInstrSize = sizeof(code_t); // 4 bytes
+    regMaskTP     gcrefRegs;
+    regMaskTP     byrefRegs;
+
+    VARSET_TP GCvars(VarSetOps::UninitVal());
+
+    // Is this a "fat" call descriptor?
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        gcrefRegs             = idCall->idcGcrefRegs;
+        byrefRegs             = idCall->idcByrefRegs;
+        VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+    }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
+
+        gcrefRegs = emitDecodeCallGCregs(id);
+        byrefRegs = 0;
+        VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+    }
+
+    /* We update the GC info before the call as the variables cannot be
+        used by the call. Killing variables before the call helps with
+        boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+        If we ever track aliased variables (which could be used by the
+        call), we would have to keep them alive past the call. */
+
+    emitUpdateLiveGCvars(GCvars, dst);
+#ifdef DEBUG
+    // NOTEADD:
+    // Output any delta in GC variable info, corresponding to the before-call GC var updates done above.
+    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
+    {
+        emitDispGCVarDelta(); // define in emit.cpp
+    }
+#endif // DEBUG
+
+    assert(id->idIns() == INS_jirl);
+    if (id->idIsCallRegPtr())
+    { // EC_INDIR_R
+        code = emitInsCode(id->idIns());
+        code |= (code_t)id->idReg4();
+        code |= (code_t)id->idReg3() << 5;
+        // the offset default is 0;
+        *(code_t*)dst = code;
+    }
+    else if (id->idIsReloc())
+    {
+        // pc + offset_38bits
+        //
+        //   pcaddu18i  t2, addr-hi20
+        //   jilr r0/1,t2,addr-lo18
+
+        *(code_t*)dst = 0x1e00000e;
+
+        size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr.
+
+        int reg2 = (int)addr & 1;
+        addr     = addr ^ 1;
+
+        assert(isValidSimm38(addr - (ssize_t)dst));
+        assert((addr & 3) == 0);
+
+        dst += 4;
+#ifdef DEBUG
+        code = emitInsCode(INS_pcaddu18i);
+        assert((code | (14)) == 0x1e00000e);
+        assert((int)REG_T2 == 14);
+        code = emitInsCode(INS_jirl);
+        assert(code == 0x4c000000);
+#endif
+        *(code_t*)dst = 0x4c000000 | (14 << 5) | reg2;
+
+        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_LOONGARCH64_JIR);
+    }
+    else
+    {
+        // lu12i_w  t2, dst_offset_lo32-hi   // TODO-LoongArch64: maybe optimize.
+        // ori  t2, t2, dst_offset_lo32-lo
+        // lu32i_d  t2, dst_offset_hi32-lo
+        // jirl  t2
+
+        ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
+        assert((imm >> 32) == 0xff);
+
+        int reg2 = (int)(imm & 1);
+        imm -= reg2;
+
+        code = emitInsCode(INS_lu12i_w);
+        code |= (code_t)REG_T2;
+        code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
+        *(code_t*)dst = code;
+        dst += 4;
+
+        code = emitInsCode(INS_ori);
+        code |= (code_t)REG_T2;
+        code |= (code_t)REG_T2 << 5;
+        code |= (code_t)(imm & 0xfff) << 10;
+        *(code_t*)dst = code;
+        dst += 4;
+
+        code = emitInsCode(INS_lu32i_d);
+        code |= (code_t)REG_T2;
+        code |= 0xff << 5;
+
+        *(code_t*)dst = code;
+        dst += 4;
+
+        code = emitInsCode(INS_jirl);
+        code |= (code_t)reg2;
+        code |= (code_t)REG_T2 << 5;
+        // the offset default is 0;
+        *(code_t*)dst = code;
+    }
+
+    dst += 4;
+
+    // update volatile regs within emitThisGCrefRegs and emitThisByrefRegs.
+    if (gcrefRegs != emitThisGCrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+    }
+    if (byrefRegs != emitThisByrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+    }
+
+    // If the method returns a GC ref, mark INTRET (A0) appropriately.
+    if (id->idGCref() == GCT_GCREF)
+    {
+        gcrefRegs = emitThisGCrefRegs | RBM_INTRET;
+    }
+    else if (id->idGCref() == GCT_BYREF)
+    {
+        byrefRegs = emitThisByrefRegs | RBM_INTRET;
+    }
+
+    // If is a multi-register return method is called, mark INTRET_1 (A1) appropriately
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        if (idCall->idSecondGCref() == GCT_GCREF)
+        {
+            gcrefRegs |= RBM_INTRET_1;
+        }
+        else if (idCall->idSecondGCref() == GCT_BYREF)
+        {
+            byrefRegs |= RBM_INTRET_1;
+        }
+    }
+
+    // If the GC register set has changed, report the new set.
+    if (gcrefRegs != emitThisGCrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+    }
+    // If the Byref register set has changed, report the new set.
+    if (byrefRegs != emitThisByrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+    }
+
+    // Some helper calls may be marked as not requiring GC info to be recorded.
+    if (!id->idIsNoGC())
+    {
+        // On LOONGARCH64, as on AMD64, we don't change the stack pointer to push/pop args.
+        // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+        // to record the call for GC info purposes.  (It might be best to use an alternate call,
+        // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+        emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+
+        // Do we need to record a call location for GC purposes?
+        //
+        if (!emitFullGCinfo)
+        {
+            emitRecordGCcall(dst, callInstrSize);
+        }
+    }
+    if (id->idIsCallRegPtr())
+    {
+        callInstrSize = 1 << 2;
+    }
+    else
+    {
+        callInstrSize = id->idIsReloc() ? (2 << 2) : (4 << 2); // INS_OPTS_C: 2/4-ins.
+    }
+
+    return callInstrSize;
+}
+
+//----------------------------------------------------------------------------------
+//  LoongArch64 has an individual implementation for emitJumpDistBind().
+//
+//  Bind targets of relative jumps/branch to choose the smallest possible encoding.
+//  LoongArch64 has a small medium, and large encoding.
+//
+//  Even though the small encoding is offset-18bits which lowest 2bits is always 0.
+//  The small encoding as the default is fit for most cases.
+//
+
+void emitter::emitJumpDistBind()
+{
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("*************** In emitJumpDistBind()\n");
+    }
+    if (EMIT_INSTLIST_VERBOSE)
+    {
+        printf("\nInstruction list before jump distance binding:\n\n");
+        emitDispIGlist(true);
+    }
+#endif
+
+    instrDescJmp* jmp;
+
+    UNATIVE_OFFSET adjIG;
+    UNATIVE_OFFSET adjSJ;
+    insGroup*      lstIG;
+#ifdef DEBUG
+    insGroup* prologIG = emitPrologIG;
+#endif // DEBUG
+
+    // NOTE:
+    //  bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J;
+    //  bit1 of isLinkingEnd_LA: indicating not needed updating the size while emitTotalCodeSize <= (0x7fff << 2) or had
+    //  updated;
+    unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0;
+
+    UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot.
+    // small  jump max. neg distance
+    NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG;
+    // small  jump max. pos distance
+    NATIVE_OFFSET psd =
+        B_DIST_SMALL_MAX_POS -
+        emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+
+/*****************************************************************************/
+/* If the default small encoding is not enough, we start again here.     */
+/*****************************************************************************/
+
+AGAIN:
+
+#ifdef DEBUG
+    emitCheckIGoffsets();
+#endif
+
+#ifdef DEBUG
+    insGroup*     lastIG = nullptr;
+    instrDescJmp* lastSJ = nullptr;
+#endif
+
+    lstIG = nullptr;
+    adjSJ = 0;
+    adjIG = 0;
+
+    for (jmp = emitJumpList; jmp; jmp = jmp->idjNext)
+    {
+        insGroup* jmpIG;
+        insGroup* tgtIG;
+
+        UNATIVE_OFFSET jsz; // size of the jump instruction in bytes
+
+        NATIVE_OFFSET  extra;           // How far beyond the short jump range is this jump offset?
+        UNATIVE_OFFSET srcInstrOffs;    // offset of the source instruction of the jump
+        UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative
+                                        // offset of the jump
+        UNATIVE_OFFSET dstOffs;
+        NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
+
+/* Make sure the jumps are properly ordered */
+
+#ifdef DEBUG
+        assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ));
+        lastSJ = (lastIG == jmp->idjIG) ? jmp : nullptr;
+
+        assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG ||
+               emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow
+        lastIG = jmp->idjIG;
+#endif // DEBUG
+
+        /* Get hold of the current jump size */
+
+        jsz = jmp->idCodeSize();
+
+        /* Get the group the jump is in */
+
+        jmpIG = jmp->idjIG;
+
+        /* Are we in a group different from the previous jump? */
+
+        if (lstIG != jmpIG)
+        {
+            /* Were there any jumps before this one? */
+
+            if (lstIG)
+            {
+                /* Adjust the offsets of the intervening blocks */
+
+                do
+                {
+                    lstIG = lstIG->igNext;
+                    assert(lstIG);
+#ifdef DEBUG
+                    if (EMITVERBOSE)
+                    {
+                        printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                               lstIG->igOffs + adjIG);
+                    }
+#endif // DEBUG
+                    lstIG->igOffs += adjIG;
+                    assert(IsCodeAligned(lstIG->igOffs));
+                } while (lstIG != jmpIG);
+            }
+
+            /* We've got the first jump in a new group */
+            adjSJ = 0;
+            lstIG = jmpIG;
+        }
+
+        /* Apply any local size adjustment to the jump's relative offset */
+        jmp->idjOffs += adjSJ;
+
+        // If this is a jump via register, the instruction size does not change, so we are done.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+        /* Have we bound this jump's target already? */
+
+        if (jmp->idIsBound())
+        {
+            /* Does the jump already have the smallest size? */
+
+            if (jmp->idjShort)
+            {
+                // We should not be jumping/branching across funclets/functions
+                emitCheckFuncletBranch(jmp, jmpIG);
+
+                continue;
+            }
+
+            tgtIG = jmp->idAddr()->iiaIGlabel;
+        }
+        else
+        {
+            /* First time we've seen this label, convert its target */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+            tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                if (tgtIG)
+                {
+                    printf(" to %s\n", emitLabelString(tgtIG));
+                }
+                else
+                {
+                    printf("-- ERROR, no emitter cookie for " FMT_BB "; it is probably missing BBF_HAS_LABEL.\n",
+                           jmp->idAddr()->iiaBBlabel->bbNum);
+                }
+            }
+            assert(tgtIG);
+#endif // DEBUG
+
+            /* Record the bound target */
+
+            jmp->idAddr()->iiaIGlabel = tgtIG;
+            jmp->idSetIsBound();
+        }
+
+        // We should not be jumping/branching across funclets/functions
+        emitCheckFuncletBranch(jmp, jmpIG);
+
+        /*
+            In the following distance calculations, if we're not actually
+            scheduling the code (i.e. reordering instructions), we can
+            use the actual offset of the jump (rather than the beg/end of
+            the instruction group) since the jump will not be moved around
+            and thus its offset is accurate.
+
+            First we need to figure out whether this jump is a forward or
+            backward one; to do this we simply look at the ordinals of the
+            group that contains the jump and the target.
+         */
+
+        srcInstrOffs = jmpIG->igOffs + jmp->idjOffs;
+
+        /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */
+        dstOffs = tgtIG->igOffs;
+
+        srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch
+
+        if (jmpIG->igNum < tgtIG->igNum)
+        {
+            /* Forward jump */
+
+            /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
+               here and the target could be shortened, causing the actual distance to shrink.
+             */
+
+            dstOffs += adjIG;
+
+            /* Compute the distance estimate */
+
+            jmpDist = dstOffs - srcEncodingOffs;
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist - psd;
+
+#if DEBUG_EMIT
+            assert(jmp->idDebugOnlyInfo() != nullptr);
+            if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                }
+                printf("[1] Jump  block is at %08X\n", jmpIG->igOffs);
+                printf("[1] Jump reloffset is %04X\n", jmp->idjOffs);
+                printf("[1] Jump source is at %08X\n", srcEncodingOffs);
+                printf("[1] Label block is at %08X\n", dstOffs);
+                printf("[1] Jump  dist. is    %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[1] Dist excess [S] = %d  \n", extra);
+                }
+            }
+            if (EMITVERBOSE)
+            {
+                printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+                       jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+            }
+#endif // DEBUG_EMIT
+
+            assert(jmpDist >= 0); // Forward jump
+            assert(!(jmpDist & 0x3));
+
+            if (isLinkingEnd_LA & 0x2)
+            {
+                jmp->idAddr()->iiaSetJmpOffset(jmpDist);
+            }
+            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
+            {
+                instruction ins = jmp->idIns();
+                assert((INS_bceqz <= ins) && (ins <= INS_bl));
+
+                if (ins <
+                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                {
+                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
+                    {
+                        extra = 4;
+                    }
+                    else
+                    {
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                        extra = 8;
+                    }
+                }
+                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                {
+                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
+                        continue;
+
+                    extra = 4;
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                }
+                else
+                {
+                    assert(ins == INS_b || ins == INS_bl);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                    continue;
+                }
+
+                jmp->idInsOpt(INS_OPTS_JIRL);
+                jmp->idCodeSize(jmp->idCodeSize() + extra);
+                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjSJ += (UNATIVE_OFFSET)extra;
+                adjIG += (UNATIVE_OFFSET)extra;
+                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
+                jmpIG->igFlags |= IGF_UPD_ISZ;
+                isLinkingEnd_LA |= 0x1;
+            }
+            continue;
+        }
+        else
+        {
+            /* Backward jump */
+
+            /* Compute the distance estimate */
+
+            jmpDist = srcEncodingOffs - dstOffs;
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist + nsd;
+
+#if DEBUG_EMIT
+            assert(jmp->idDebugOnlyInfo() != nullptr);
+            if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                }
+                printf("[2] Jump  block is at %08X\n", jmpIG->igOffs);
+                printf("[2] Jump reloffset is %04X\n", jmp->idjOffs);
+                printf("[2] Jump source is at %08X\n", srcEncodingOffs);
+                printf("[2] Label block is at %08X\n", dstOffs);
+                printf("[2] Jump  dist. is    %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[2] Dist excess [S] = %d  \n", extra);
+                }
+            }
+            if (EMITVERBOSE)
+            {
+                printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+                       jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+            }
+#endif // DEBUG_EMIT
+
+            assert(jmpDist >= 0); // Backward jump
+            assert(!(jmpDist & 0x3));
+
+            if (isLinkingEnd_LA & 0x2)
+            {
+                jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative!
+            }
+            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
+            {
+                instruction ins = jmp->idIns();
+                assert((INS_bceqz <= ins) && (ins <= INS_bl));
+
+                if (ins <
+                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                {
+                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
+                    {
+                        extra = 4;
+                    }
+                    else
+                    {
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                        extra = 8;
+                    }
+                }
+                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                {
+                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
+                        continue;
+
+                    extra = 4;
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                }
+                else
+                {
+                    assert(ins == INS_b || ins == INS_bl);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                    continue;
+                }
+
+                jmp->idInsOpt(INS_OPTS_JIRL);
+                jmp->idCodeSize(jmp->idCodeSize() + extra);
+                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjSJ += (UNATIVE_OFFSET)extra;
+                adjIG += (UNATIVE_OFFSET)extra;
+                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
+                jmpIG->igFlags |= IGF_UPD_ISZ;
+                isLinkingEnd_LA |= 0x1;
+            }
+            continue;
+        }
+    } // end for each jump
+
+    if ((isLinkingEnd_LA & 0x3) < 0x2)
+    {
+        // indicating the instrDescJmp's size of the type INS_OPTS_J had updated
+        // after the first round and should iterate again to update.
+        isLinkingEnd_LA = 0x2;
+
+        // Adjust offsets of any remaining blocks.
+        for (; lstIG;)
+        {
+            lstIG = lstIG->igNext;
+            if (!lstIG)
+            {
+                break;
+            }
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                       lstIG->igOffs + adjIG);
+            }
+#endif // DEBUG
+
+            lstIG->igOffs += adjIG;
+
+            assert(IsCodeAligned(lstIG->igOffs));
+        }
+        goto AGAIN;
+    }
+
+#ifdef DEBUG
+    if (EMIT_INSTLIST_VERBOSE)
+    {
+        printf("\nLabels list after the jump dist binding:\n\n");
+        emitDispIGlist(false);
+    }
+
+    emitCheckIGoffsets();
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit LOONGARCH64 instruction
+ */
+
+/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
+{
+    assert(sizeof(code_t) == 4);
+    BYTE* dstRW       = dst + writeableOffset;
+    *((code_t*)dstRW) = code;
+
+    return sizeof(code_t);
+}
+
+/*****************************************************************************
+*
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    BYTE*       dst  = *dp;
+    BYTE*       dst2 = dst; // addr for updating gc info if needed.
+    code_t      code = 0;
+    instruction ins;
+    size_t      sz; // = emitSizeOfInsDsc(id);
+
+#ifdef DEBUG
+#if DUMP_GC_TABLES
+    bool dspOffs = emitComp->opts.dspGCtbls;
+#else
+    bool dspOffs = !emitComp->opts.disDiffable;
+#endif
+#endif // DEBUG
+
+    assert(REG_NA == (int)REG_NA);
+
+    insOpts insOp = id->idInsOpt();
+
+    switch (insOp)
+    {
+        case INS_OPTS_RELOC:
+        {
+            //  case:EA_HANDLE_CNS_RELOC
+            //   pcaddu12i  reg, off-hi-20bits
+            //   addi_d  reg, reg, off-lo-12bits
+            //  case:EA_PTR_DSP_RELOC
+            //   pcaddu12i  reg, off-hi-20bits
+            //   ld_d  reg, reg, off-lo-12bits
+
+            regNumber reg1 = id->idReg1();
+
+            *(code_t*)dst = 0x1c000000 | (code_t)reg1;
+
+            dst2 = dst;
+            dst += 4;
+
+#ifdef DEBUG
+            code = emitInsCode(INS_pcaddu12i);
+            assert(code == 0x1c000000);
+            code = emitInsCode(INS_addi_d);
+            assert(code == 0x02c00000);
+            code = emitInsCode(INS_ld_d);
+            assert(code == 0x28c00000);
+#endif
+
+            if (id->idIsCnsReloc())
+            {
+                ins           = INS_addi_d;
+                *(code_t*)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1 << 5);
+            }
+            else
+            {
+                assert(id->idIsDspReloc());
+                ins           = INS_ld_d;
+                *(code_t*)dst = 0x28c00000 | (code_t)reg1 | (code_t)(reg1 << 5);
+            }
+
+            if (id->idGCref() != GCT_NONE)
+            {
+                emitGCregLiveUpd(id->idGCref(), reg1, dst);
+            }
+            else
+            {
+                emitGCregDeadUpd(reg1, dst);
+            }
+
+            dst += 4;
+
+            emitRecordRelocation(dst2, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);
+
+            dst2 += 4;
+
+            sz = sizeof(instrDesc);
+        }
+        break;
+        case INS_OPTS_I:
+        {
+            ssize_t   imm  = (ssize_t)(id->idAddr()->iiaAddr);
+            regNumber reg1 = id->idReg1();
+            dst2 += 4;
+
+            switch (id->idCodeSize())
+            {
+                case 8:
+                {
+                    if (id->idReg2())
+                    { // special for INT64_MAX or UINT32_MAX;
+                        code = emitInsCode(INS_addi_d);
+                        code |= (code_t)reg1;
+                        code |= (code_t)REG_R0;
+                        code |= 0xfff << 10;
+
+                        *(code_t*)dst = code;
+                        dst += 4;
+
+                        ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32;
+                        code        = emitInsCode(INS_srli_d);
+                        code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10));
+                        *(code_t*)dst = code;
+                    }
+                    else
+                    {
+                        code = emitInsCode(INS_lu12i_w);
+                        code |= (code_t)reg1;
+                        code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
+                        *(code_t*)dst = code;
+                        dst += 4;
+
+                        code = emitInsCode(INS_ori);
+                        code |= (code_t)reg1;
+                        code |= (code_t)reg1 << 5;
+                        code |= (code_t)(imm & 0xfff) << 10;
+                        *(code_t*)dst = code;
+                    }
+                    break;
+                }
+                case 12:
+                {
+                    code = emitInsCode(INS_lu12i_w);
+                    code |= (code_t)reg1;
+                    code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_ori);
+                    code |= (code_t)reg1;
+                    code |= (code_t)reg1 << 5;
+                    code |= (code_t)(imm & 0xfff) << 10;
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_lu32i_d);
+                    code |= (code_t)reg1;
+                    code |= ((code_t)(imm >> 32) & 0xfffff) << 5;
+
+                    *(code_t*)dst = code;
+
+                    break;
+                }
+                case 16:
+                {
+                    code = emitInsCode(INS_lu12i_w);
+                    code |= (code_t)reg1;
+                    code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_ori);
+                    code |= (code_t)reg1;
+                    code |= (code_t)reg1 << 5;
+                    code |= (code_t)(imm & 0xfff) << 10;
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_lu32i_d);
+                    code |= (code_t)reg1;
+                    code |= (code_t)((imm >> 32) & 0xfffff) << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_lu52i_d);
+                    code |= (code_t)reg1;
+                    code |= (code_t)(reg1) << 5;
+                    code |= ((code_t)(imm >> 52) & 0xfff) << 10;
+
+                    *(code_t*)dst = code;
+
+                    break;
+                }
+                default:
+                    unreached();
+                    break;
+            }
+
+            ins = INS_ori;
+            dst += 4;
+
+            sz = sizeof(instrDesc);
+        }
+        break;
+        case INS_OPTS_RC:
+        {
+            // Reference to JIT data
+
+            // when id->idIns == bl, for reloc!
+            //   pcaddu12i r21, off-hi-20bits
+            //   addi_d  reg, r21, off-lo-12bits
+            // when id->idIns == load-ins
+            //   pcaddu12i r21, off-hi-20bits
+            //   load  reg, offs_lo-12bits(r21)    #when ins is load ins.
+            //
+            // when id->idIns == bl
+            //   lu12i_w r21, addr-hi-20bits
+            //   ori     reg, r21, addr-lo-12bits
+            //   lu32i_d reg, addr_hi-32bits
+            //
+            // when id->idIns == load-ins
+            //   lu12i_w r21, offs_hi-20bits
+            //   lu32i_d r21, 0xff  addr_hi-32bits
+            //   load  reg, addr_lo-12bits(r21)
+            assert(id->idAddr()->iiaIsJitDataOffset());
+            assert(id->idGCref() == GCT_NONE);
+
+            int doff = id->idAddr()->iiaGetJitDataOffset();
+            assert(doff >= 0);
+
+            ssize_t imm = emitGetInsSC(id);
+            assert((imm >= 0) && (imm < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0.
+
+            unsigned dataOffs = (unsigned)(doff + imm);
+
+            assert(dataOffs < emitDataSize());
+
+            ins            = id->idIns();
+            regNumber reg1 = id->idReg1();
+
+            if (id->idIsReloc())
+            {
+                // get the addr-offset of the data.
+                imm = (ssize_t)emitConsBlock - (ssize_t)dst + dataOffs;
+                assert(imm > 0);
+                assert(!(imm & 3));
+
+                doff = (int)(imm & 0x800);
+                imm += doff;
+                assert(isValidSimm20(imm >> 12));
+
+                doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
+
+#ifdef DEBUG
+                code = emitInsCode(INS_pcaddu12i);
+                assert(code == 0x1c000000);
+#endif
+                code          = 0x1c000000 | 21;
+                *(code_t*)dst = code | (((code_t)imm & 0xfffff000) >> 7);
+                dst += 4;
+
+                if (ins == INS_bl)
+                {
+                    assert(isGeneralRegister(reg1));
+                    ins = INS_addi_d;
+#ifdef DEBUG
+                    code = emitInsCode(INS_addi_d);
+                    assert(code == 0x02c00000);
+#endif
+                    code          = 0x02c00000 | (21 << 5);
+                    *(code_t*)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10);
+                }
+                else
+                {
+                    code = emitInsCode(ins);
+                    code |= (code_t)(reg1 & 0x1f);
+                    code |= (code_t)REG_R21 << 5; // NOTE:here must be REG_R21 !!!
+                    code |= (code_t)(doff & 0xfff) << 10;
+                    *(code_t*)dst = code;
+                }
+                dst += 4;
+                dst2 = dst;
+            }
+            else
+            {
+                // get the addr of the data.
+                imm = (ssize_t)emitConsBlock + dataOffs;
+
+                code = emitInsCode(INS_lu12i_w);
+                if (ins == INS_bl)
+                {
+                    assert((imm >> 32) == 0xff);
+
+                    doff = (int)imm >> 12;
+                    code |= (code_t)REG_R21;
+                    code |= ((code_t)doff & 0xfffff) << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_ori);
+                    code |= (code_t)reg1;
+                    code |= (code_t)REG_R21 << 5;
+                    code |= (code_t)(imm & 0xfff) << 10;
+                    *(code_t*)dst = code;
+                    dst += 4;
+                    dst2 = dst;
+
+                    ins  = INS_lu32i_d;
+                    code = emitInsCode(INS_lu32i_d);
+                    code |= (code_t)reg1;
+                    code |= 0xff << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+                }
+                else
+                {
+                    doff = (int)(imm & 0x800);
+                    imm += doff;
+                    doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
+
+                    assert((imm >> 32) == 0xff);
+
+                    dataOffs = (unsigned)(imm >> 12); // addr-hi-20bits.
+                    code |= (code_t)REG_R21;
+                    code |= ((code_t)dataOffs & 0xfffff) << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_lu32i_d);
+                    code |= (code_t)REG_R21;
+                    code |= 0xff << 5;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(ins);
+                    code |= (code_t)(reg1 & 0x1f);
+                    code |= (code_t)REG_R21 << 5;
+                    code |= (code_t)(doff & 0xfff) << 10;
+
+                    *(code_t*)dst = code;
+                    dst += 4;
+                    dst2 = dst;
+                }
+            }
+
+            sz = sizeof(instrDesc);
+        }
+        break;
+
+        case INS_OPTS_RL:
+        {
+            // if for reloc!
+            //   pcaddu12i reg, offset-hi20
+            //   addi_d  reg, reg, offset-lo12
+            //
+            // else:       // TODO-LoongArch64:optimize.
+            //   lu12i_w reg, dst-hi-12bits
+            //   ori reg, reg, dst-lo-12bits
+            //   lu32i_d reg, dst-hi-32bits
+
+            insGroup* tgtIG          = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel);
+            id->idAddr()->iiaIGlabel = tgtIG;
+
+            regNumber reg1 = id->idReg1();
+            assert(isGeneralRegister(reg1));
+
+            if (id->idIsReloc())
+            {
+                ssize_t imm = (ssize_t)tgtIG->igOffs;
+                imm         = (ssize_t)emitCodeBlock + imm - (ssize_t)dst;
+                assert((imm & 3) == 0);
+
+                int doff = (int)(imm & 0x800);
+                imm += doff;
+                assert(isValidSimm20(imm >> 12));
+
+                doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
+
+                code          = 0x1c000000;
+                *(code_t*)dst = code | (code_t)reg1 | ((imm & 0xfffff000) >> 7);
+                dst += 4;
+                dst2 = dst;
+#ifdef DEBUG
+                code = emitInsCode(INS_pcaddu12i);
+                assert(code == 0x1c000000);
+                code = emitInsCode(INS_addi_d);
+                assert(code == 0x02c00000);
+#endif
+                *(code_t*)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1 << 5) | ((doff & 0xfff) << 10);
+                ins           = INS_addi_d;
+            }
+            else
+            {
+                ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock;
+                assert((imm >> 32) == 0xff);
+
+                code = emitInsCode(INS_lu12i_w);
+                code |= (code_t)REG_R21;
+                code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
+                *(code_t*)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_ori);
+                code |= (code_t)reg1;
+                code |= (code_t)REG_R21 << 5;
+                code |= (code_t)(imm & 0xfff) << 10;
+                *(code_t*)dst = code;
+                dst += 4;
+                dst2 = dst;
+
+                ins  = INS_lu32i_d;
+                code = emitInsCode(INS_lu32i_d);
+                code |= (code_t)reg1;
+                code |= 0xff << 5;
+
+                *(code_t*)dst = code;
+            }
+
+            dst += 4;
+
+            sz = sizeof(instrDesc);
+        }
+        break;
+        case INS_OPTS_JIRL:
+            //  case_1:           <----------from INS_OPTS_J:
+            //   xor r21,reg1,reg2   |   bne/beq  _next   |    bcnez/bceqz  _next
+            //   bnez/beqz  dst      |   b  dst           |    b  dst
+            //_next:
+            //
+            //  case_2:           <---------- TODO-LoongArch64: from INS_OPTS_J:
+            //   bnez/beqz  _next:
+            //   pcaddi r21,off-hi
+            //   jirl  r0,r21,off-lo
+            //_next:
+            //
+            //  case_3:           <----------INS_OPTS_JIRL:   //not used by now !!!
+            //   b dst
+            //
+            //  case_4:           <----------INS_OPTS_JIRL:   //not used by now !!!
+            //   pcaddi r21,off-hi
+            //   jirl  r0,r21,off-lo
+            //
+            {
+                instrDescJmp* jmp = (instrDescJmp*)id;
+
+                regNumber reg1 = id->idReg1();
+                {
+                    ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset();
+                    imm -= 4;
+
+                    assert((imm & 0x3) == 0);
+
+                    ins = jmp->idIns();
+                    assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!!
+                    switch (jmp->idCodeSize())
+                    {
+                        case 8:
+                        {
+                            regNumber reg2 = id->idReg2();
+                            assert((INS_bceqz <= ins) && (ins <= INS_bgeu));
+
+                            if ((INS_beq == ins) || (INS_bne == ins))
+                            {
+                                if ((-0x400000 <= imm) && (imm < 0x400000))
+                                {
+                                    code = emitInsCode(INS_xor);
+                                    code |= (code_t)REG_R21;
+                                    code |= (code_t)reg1 << 5;
+                                    code |= (code_t)reg2 << 10;
+
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+
+                                    code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez);
+                                    code |= (code_t)REG_R21 << 5;
+                                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                                    code |= (((code_t)imm >> 18) & 0x1f);
+
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+                                }
+                                else
+                                {
+                                    assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                                    assert((INS_bne & 0xfffe) == INS_beq);
+
+                                    code = emitInsCode((instruction)((int)ins ^ 0x1));
+                                    code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */
+                                    code |= ((code_t)(reg2) /*& 0x1f */);      /* rd */
+                                    code |= 0x800;
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+
+                                    code = emitInsCode(INS_b);
+                                    code |= ((code_t)imm >> 18) & 0x3ff;
+                                    code |= ((code_t)imm << 8) & 0x3fffc00;
+
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+                                }
+                            }
+                            else if ((INS_bceqz == ins) || (INS_bcnez == ins))
+                            {
+                                assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                                assert((INS_bcnez & 0xfffe) == INS_bceqz);
+
+                                code = emitInsCode((instruction)((int)ins ^ 0x1));
+                                code |= ((code_t)reg1) << 5;
+                                code |= 0x800;
+                                *(code_t*)dst = code;
+                                dst += 4;
+
+                                code = emitInsCode(INS_b);
+                                code |= ((code_t)imm >> 18) & 0x3ff;
+                                code |= ((code_t)imm << 8) & 0x3fffc00;
+
+                                *(code_t*)dst = code;
+                                dst += 4;
+                            }
+                            else if ((INS_blt <= ins) && (ins <= INS_bgeu))
+                            {
+                                assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                                assert((INS_bge & 0xfffe) == INS_blt);
+                                assert((INS_bgeu & 0xfffe) == INS_bltu);
+
+                                code = emitInsCode((instruction)((int)ins ^ 0x1));
+                                code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */
+                                code |= ((code_t)(reg2) /*& 0x1f */);      /* rd */
+                                code |= 0x800;
+                                *(code_t*)dst = code;
+                                dst += 4;
+
+                                code = emitInsCode(INS_b);
+                                code |= ((code_t)imm >> 18) & 0x3ff;
+                                code |= ((code_t)imm << 8) & 0x3fffc00;
+
+                                *(code_t*)dst = code;
+                                dst += 4;
+                            }
+                            break;
+                        }
+
+                        default:
+                            unreached();
+                            break;
+                    }
+                }
+                sz = sizeof(instrDescJmp);
+            }
+            break;
+        case INS_OPTS_J_cond:
+            //   b_cond  dst-relative.
+            //
+            // NOTE:
+            //  the case "imm > 0x7fff" not supported.
+            //  More info within the emitter::emitIns_J_cond_la();
+            {
+                ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot.
+                assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2));
+                assert(!(imm & 3));
+
+                ins  = id->idIns();
+                code = emitInsCode(ins);
+                code |= ((code_t)id->idReg1()) << 5;
+                code |= ((code_t)id->idReg2());
+                code |= (((code_t)imm << 8) & 0x3fffc00);
+
+                *(code_t*)dst = code;
+                dst += 4;
+
+                sz = sizeof(instrDescJmp);
+            }
+            break;
+        case INS_OPTS_J:
+            //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst-relative.
+            {
+                ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot.
+                assert((imm & 3) == 0);
+
+                ins  = id->idIns();
+                code = emitInsCode(ins);
+                if (ins == INS_b || ins == INS_bl)
+                {
+                    code |= ((code_t)imm >> 18) & 0x3ff;
+                    code |= ((code_t)imm << 8) & 0x3fffc00;
+                }
+                else if (ins == INS_bnez || ins == INS_beqz)
+                {
+                    code |= (code_t)id->idReg1() << 5;
+                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                    code |= (((code_t)imm >> 18) & 0x1f);
+                }
+                else if (ins == INS_bcnez || ins == INS_bceqz)
+                {
+                    assert((code_t)(id->idReg1()) < 8); // cc
+                    code |= (code_t)id->idReg1() << 5;
+                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                    code |= (((code_t)imm >> 18) & 0x1f);
+                }
+                else if ((INS_beq <= ins) && (ins <= INS_bgeu))
+                {
+                    code |= ((code_t)id->idReg1()) << 5;
+                    code |= ((code_t)id->idReg2());
+                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                }
+                else
+                {
+                    assert(!"unimplemented on LOONGARCH yet");
+                }
+
+                *(code_t*)dst = code;
+                dst += 4;
+
+                sz = sizeof(instrDescJmp);
+            }
+            break;
+
+        case INS_OPTS_C:
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" call descriptor */
+                sz = sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+                sz = sizeof(instrDesc);
+            }
+            dst += emitOutputCall(ig, dst, id, 0);
+            ins = INS_nop;
+            break;
+
+        // case INS_OPTS_NONE:
+        default:
+            *(code_t*)dst = id->idAddr()->iiaGetInstrEncode();
+            dst += 4;
+            dst2 = dst;
+            ins  = id->idIns();
+            sz   = emitSizeOfInsDsc(id);
+            break;
+    }
+
+    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
+    // for stores, but we ignore those cases here.)
+    if (emitInsMayWriteToGCReg(ins)) // True if "id->idIns()" writes to a register than can hold GC ref.
+    {
+        // We assume that "idReg1" is the primary destination register for all instructions
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst2);
+        }
+        else
+        {
+            emitGCregDeadUpd(id->idReg1(), dst2);
+        }
+    }
+
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
+    if (emitInsWritesToLclVarStackLoc(id) /*|| emitInsWritesToLclVarStackLocPair(id)*/)
+    {
+        int      varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+        unsigned ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
+        bool     FPbased;
+        int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst2 DEBUG_ARG(varNum));
+        }
+        else
+        {
+            // If the type of the local is a gc ref type, update the liveness.
+            var_types vt;
+            if (varNum >= 0)
+            {
+                // "Regular" (non-spill-temp) local.
+                vt = var_types(emitComp->lvaTable[varNum].lvType);
+            }
+            else
+            {
+                TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
+                vt              = tmpDsc->tdTempType();
+            }
+            if (vt == TYP_REF || vt == TYP_BYREF)
+                emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
+        }
+        // if (emitInsWritesToLclVarStackLocPair(id))
+        //{
+        //    unsigned ofs2 = ofs + TARGET_POINTER_SIZE;
+        //    if (id->idGCrefReg2() != GCT_NONE)
+        //    {
+        //        emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp);
+        //    }
+        //    else
+        //    {
+        //        // If the type of the local is a gc ref type, update the liveness.
+        //        var_types vt;
+        //        if (varNum >= 0)
+        //        {
+        //            // "Regular" (non-spill-temp) local.
+        //            vt = var_types(emitComp->lvaTable[varNum].lvType);
+        //        }
+        //        else
+        //        {
+        //            TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
+        //            vt              = tmpDsc->tdTempType();
+        //        }
+        //        if (vt == TYP_REF || vt == TYP_BYREF)
+        //            emitGCvarDeadUpd(adr + ofs2, *dp);
+        //    }
+        //}
+    }
+
+#ifdef DEBUG
+    /* Make sure we set the instruction descriptor size correctly */
+
+    // size_t expected = emitSizeOfInsDsc(id);
+    // assert(sz == expected);
+
+    if (emitComp->opts.disAsm || emitComp->verbose)
+    {
+        code_t* cp = (code_t*)*dp;
+        while ((BYTE*)cp != dst)
+        {
+            emitDisInsName(*cp, (BYTE*)cp, id);
+            cp++;
+        }
+    }
+
+    if (emitComp->compDebugBreak)
+    {
+        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+        // emitting instruction a6, (i.e. IN00a6 in jitdump).
+        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+        {
+            assert(!"JitBreakEmitOutputInstr reached");
+        }
+    }
+#endif
+
+    /* All instructions are expected to generate code */
+
+    assert(*dp != dst);
+
+    *dp = dst;
+
+    return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+// clang-format off
+static const char* const RegNames[] =
+{
+    #define REGDEF(name, rnum, mask, sname) sname,
+    #include "register.h"
+};
+// clang-format on
+
+//----------------------------------------------------------------------------------------
+// Disassemble the given instruction.
+// The `emitter::emitDisInsName` is focused on the most important for debugging.
+// So it implemented as far as simply and independently which is very useful for
+// porting easily to the release mode.
+//
+// Arguments:
+//    code - The instruction's encoding.
+//    addr - The address of the code.
+//    id   - The instrDesc of the code if needed.
+//
+// Note:
+//    The length of the instruction's name include aligned space is 13.
+//
+
+void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id)
+{
+    const BYTE*       insAdr      = addr;
+    const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
+
+    unsigned int opcode = (code >> 26) & 0x3f;
+
+    bool disOpcode = !emitComp->opts.disDiffable;
+    bool disAddr   = emitComp->opts.disAddr;
+    if (disAddr)
+    {
+        printf("  0x%llx", insAdr);
+    }
+
+    printf("  ");
+
+    if (disOpcode)
+    {
+        printf("%08X  ", code);
+    }
+
+    // bits: 31-26,MSB6
+    switch (opcode)
+    {
+        case 0x0:
+        {
+            goto Label_OPCODE_0;
+        }
+        case 0x2:
+        {
+            goto Label_OPCODE_2;
+        }
+        case 0x3:
+        {
+            goto Label_OPCODE_3;
+        }
+        case 0xe:
+        {
+            goto Label_OPCODE_E;
+        }
+        case LA_2RI16_ADDU16I_D: // 0x4
+        {
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
+            short       si16 = (code >> 10) & 0xffff;
+            printf("addu16i.d    %s, %s, %d\n", rd, rj, si16);
+            return;
+        }
+        case 0x5:
+        case 0x6:
+        case 0x7:
+        {
+            // bits: 31-25,MSB7
+            unsigned int inscode = (code >> 25) & 0x7f;
+            const char*  rd      = RegNames[code & 0x1f];
+            unsigned int si20    = (code >> 5) & 0xfffff;
+            switch (inscode)
+            {
+                case LA_1RI20_LU12I_W:
+                    printf("lu12i.w      %s, 0x%x\n", rd, si20);
+                    return;
+                case LA_1RI20_LU32I_D:
+                    printf("lu32i.d      %s, 0x%x\n", rd, si20);
+                    return;
+                case LA_1RI20_PCADDI:
+                    printf("pcaddi       %s, 0x%x\n", rd, si20);
+                    return;
+                case LA_1RI20_PCALAU12I:
+                    printf("pcalau12i    %s, 0x%x\n", rd, si20);
+                    return;
+                case LA_1RI20_PCADDU12I:
+                    printf("pcaddu12i    %s, 0x%x\n", rd, si20);
+                    return;
+                case LA_1RI20_PCADDU18I:
+                {
+                    printf("pcaddu18i    %s, 0x%x\n", rd, si20);
+                    return;
+                }
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+            return;
+        }
+        case 0x8:
+        case 0x9:
+        {
+            // bits: 31-24,MSB8
+            unsigned int inscode = (code >> 24) & 0xff;
+            const char*  rd      = RegNames[code & 0x1f];
+            const char*  rj      = RegNames[(code >> 5) & 0x1f];
+            short        si14    = ((code >> 10) & 0x3fff) << 2;
+            si14 >>= 2;
+            switch (inscode)
+            {
+                case LA_2RI14_LL_W:
+                    printf("ll.w         %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_SC_W:
+                    printf("sc.w         %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_LL_D:
+                    printf("ll.d         %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_SC_D:
+                    printf("sc.d         %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_LDPTR_W:
+                    printf("ldptr.w      %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_STPTR_W:
+                    printf("stptr.w      %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_LDPTR_D:
+                    printf("ldptr.d      %s, %s, %d\n", rd, rj, si14);
+                    return;
+                case LA_2RI14_STPTR_D:
+                    printf("stptr.d      %s, %s, %d\n", rd, rj, si14);
+                    return;
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+            return;
+        }
+        case 0xa:
+        {
+            // bits: 31-24,MSB8
+            unsigned int inscode = (code >> 22) & 0x3ff;
+            const char*  rd      = RegNames[code & 0x1f];
+            const char*  rj      = RegNames[(code >> 5) & 0x1f];
+            const char*  fd      = RegNames[(code & 0x1f) + 32];
+            short        si12    = ((code >> 10) & 0xfff) << 4;
+            si12 >>= 4;
+            switch (inscode)
+            {
+                case LA_2RI12_LD_B:
+                    printf("ld.b         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_H:
+                    printf("ld.h         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_W:
+                    printf("ld.w         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_D:
+                    printf("ld.d         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_B:
+                    printf("st.b         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_H:
+                    printf("st.h         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_W:
+                    printf("st.w         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_D:
+                    printf("st.d         %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_BU:
+                    printf("ld.bu        %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_HU:
+                    printf("ld.hu        %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_WU:
+                    printf("ld.wu        %s, %s, %d\n", rd, rj, si12);
+                    return;
+                case LA_2RI12_PRELD:
+                    NYI_LOONGARCH64("unused instr LA_2RI12_PRELD");
+                    return;
+                case LA_2RI12_FLD_S:
+                    printf("fld.s        %s, %s, %d\n", fd, rj, si12);
+                    return;
+                case LA_2RI12_FST_S:
+                    printf("fst.s        %s, %s, %d\n", fd, rj, si12);
+                    return;
+                case LA_2RI12_FLD_D:
+                    printf("fld.d        %s, %s, %d\n", fd, rj, si12);
+                    return;
+                case LA_2RI12_FST_D:
+                    printf("fst.d        %s, %s, %d\n", fd, rj, si12);
+                    return;
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+            return;
+        }
+        case LA_1RI21_BEQZ: // 0x10
+        {
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
+            offs21 >>= 9;
+            printf("beqz         %s, 0x%llx\n", rj, (int64_t)insAdr + offs21);
+            return;
+        }
+        case LA_1RI21_BNEZ: // 0x11
+        {
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
+            offs21 >>= 9;
+            printf("bnez         %s, 0x%llx\n", rj, (int64_t)insAdr + offs21);
+            return;
+        }
+        case 0x12:
+        {
+            // LA_1RI21_BCEQZ
+            // LA_1RI21_BCNEZ
+            const char* cj     = CFregName[(code >> 5) & 0x7];
+            int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
+            offs21 >>= 9;
+            if (0 == ((code >> 8) & 0x3))
+            {
+                printf("bceqz        %s, 0x%llx\n", cj, (int64_t)insAdr + offs21);
+                return;
+            }
+            else if (1 == ((code >> 8) & 0x3))
+            {
+                printf("bcnez        %s, 0x%llx\n", cj, (int64_t)insAdr + offs21);
+                return;
+            }
+            else
+            {
+                printf("LOONGARCH illegal instruction: %08X\n", code);
+                return;
+            }
+            return;
+        }
+        case LA_2RI16_JIRL: // 0x13
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            if (id->idDebugOnlyInfo()->idMemCookie)
+            {
+                assert(0 < id->idDebugOnlyInfo()->idMemCookie);
+                const char* methodName;
+                methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+                printf("jirl         %s, %s, %d  #%s\n", rd, rj, offs16, methodName);
+            }
+            else
+            {
+                printf("jirl         %s, %s, %d\n", rd, rj, offs16);
+            }
+            return;
+        }
+        case LA_I26_B: // 0x14
+        {
+            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6;
+            offs26 >>= 4;
+            printf("b            0x%llx\n", (int64_t)insAdr + offs26);
+            return;
+        }
+        case LA_I26_BL: // 0x15
+        {
+            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6;
+            offs26 >>= 4;
+            printf("bl           0x%llx\n", (int64_t)insAdr + offs26);
+            return;
+        }
+        case LA_2RI16_BEQ: // 0x16
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("beq          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
+            return;
+        }
+        case LA_2RI16_BNE: // 0x17
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("bne          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
+            return;
+        }
+        case LA_2RI16_BLT: // 0x18
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("blt          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
+            return;
+        }
+        case LA_2RI16_BGE: // 0x19
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("bge          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
+            return;
+        }
+        case LA_2RI16_BLTU: // 0x1a
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("bltu         %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
+            return;
+        }
+        case LA_2RI16_BGEU: // 0x1b
+        {
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("bgeu         %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
+            return;
+        }
+
+        default:
+            printf("LOONGARCH illegal instruction: %08X\n", code);
+            return;
+    }
+
+Label_OPCODE_0:
+    opcode = (code >> 22) & 0x3ff;
+
+    // bits: 31-22,MSB10
+    switch (opcode)
+    {
+        case 0x0:
+        {
+            // bits: 31-18,MSB14
+            unsigned int inscode1 = (code >> 18) & 0x3fff;
+            switch (inscode1)
+            {
+                case 0x0:
+                {
+                    // bits: 31-15,MSB17
+                    unsigned int inscode2 = (code >> 15) & 0x1ffff;
+                    switch (inscode2)
+                    {
+                        case 0x0:
+                        {
+                            // bits:31-10,MSB22
+                            unsigned int inscode3 = (code >> 10) & 0x3fffff;
+                            const char*  rd       = RegNames[code & 0x1f];
+                            const char*  rj       = RegNames[(code >> 5) & 0x1f];
+                            switch (inscode3)
+                            {
+                                case LA_2R_CLO_W:
+                                    printf("clo.w        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CLZ_W:
+                                    printf("clz.w        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CTO_W:
+                                    printf("cto.w        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CTZ_W:
+                                    printf("ctz.w        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CLO_D:
+                                    printf("clo.d        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CLZ_D:
+                                    printf("clz.d        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CTO_D:
+                                    printf("cto.d        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CTZ_D:
+                                    printf("ctz.d        %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_REVB_2H:
+                                    printf("revb.2h      %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_REVB_4H:
+                                    printf("revb.4h      %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_REVB_2W:
+                                    printf("revb.2w      %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_REVB_D:
+                                    printf("revb.d       %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_REVH_2W:
+                                    printf("revh.2w      %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_REVH_D:
+                                    printf("revh.d       %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_BITREV_4B:
+                                    printf("bitrev.4b    %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_BITREV_8B:
+                                    printf("bitrev.8b    %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_BITREV_W:
+                                    printf("bitrev.w     %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_BITREV_D:
+                                    printf("bitrev.d     %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_EXT_W_H:
+                                    printf("ext.w.h      %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_EXT_W_B:
+                                    printf("ext.w.b      %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_RDTIMEL_W:
+                                    printf("rdtimel.w    %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_RDTIMEH_W:
+                                    printf("rdtimeh.w    %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_RDTIME_D:
+                                    printf("rdtime.d     %s, %s\n", rd, rj);
+                                    return;
+                                case LA_2R_CPUCFG:
+                                    printf("cpucfg       %s, %s\n", rd, rj);
+                                    return;
+
+                                default:
+                                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                                    return;
+                            }
+                            return;
+                        }
+                        case LA_2R_ASRTLE_D:
+                        {
+                            const char* rj = RegNames[(code >> 5) & 0x1f];
+                            const char* rk = RegNames[(code >> 10) & 0x1f];
+                            printf("asrtle.d     %s, %s\n", rj, rk);
+                            return;
+                        }
+                        case LA_2R_ASRTGT_D:
+                        {
+                            const char* rj = RegNames[(code >> 5) & 0x1f];
+                            const char* rk = RegNames[(code >> 10) & 0x1f];
+                            printf("asrtgt.d     %s, %s\n", rj, rk);
+                            return;
+                        }
+                        default:
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                    }
+                    return;
+                }
+                case 0x1:
+                {
+                    // LA_OP_ALSL_W
+                    // LA_OP_ALSL_WU
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
+                    unsigned int sa2 = (code >> 15) & 0x3;
+                    if (0 == ((code >> 17) & 0x1))
+                    {
+                        printf("alsl.w       %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1));
+                        return;
+                    }
+                    else if (1 == ((code >> 17) & 0x1))
+                    {
+                        printf("alsl.wu      %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1));
+                        return;
+                    }
+                    else
+                    {
+                        printf("LOONGARCH illegal instruction: %08X\n", code);
+                        return;
+                    }
+                    return;
+                }
+                case LA_OP_BYTEPICK_W: // 0x2
+                {
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
+                    unsigned int sa2 = (code >> 15) & 0x3;
+                    printf("bytepick.w   %s, %s, %s, %d\n", rd, rj, rk, sa2);
+                    return;
+                }
+                case LA_OP_BYTEPICK_D: // 0x3
+                {
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
+                    unsigned int sa3 = (code >> 15) & 0x7;
+                    printf("bytepick.d   %s, %s, %s, %d\n", rd, rj, rk, sa3);
+                    return;
+                }
+                case 0x4:
+                case 0x5:
+                case 0x6:
+                case 0x7:
+                case 0x8:
+                case 0x9:
+                {
+                    // bits: 31-15,MSB17
+                    unsigned int inscode2 = (code >> 15) & 0x1ffff;
+                    const char*  rd       = RegNames[code & 0x1f];
+                    const char*  rj       = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk       = RegNames[(code >> 10) & 0x1f];
+
+                    switch (inscode2)
+                    {
+                        case LA_3R_ADD_W:
+                            printf("add.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_ADD_D:
+                            printf("add.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SUB_W:
+                            printf("sub.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SUB_D:
+                            printf("sub.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SLT:
+                            printf("slt          %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SLTU:
+                            printf("sltu         %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MASKEQZ:
+                            printf("maskeqz      %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MASKNEZ:
+                            printf("masknez      %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_NOR:
+                            printf("nor          %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_AND:
+                            printf("and          %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_OR:
+                            printf("or           %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_XOR:
+                            printf("xor          %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_ORN:
+                            printf("orn          %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_ANDN:
+                            printf("andn         %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SLL_W:
+                            printf("sll.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SRL_W:
+                            printf("srl.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SRA_W:
+                            printf("sra.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SLL_D:
+                            printf("sll.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SRL_D:
+                            printf("srl.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_SRA_D:
+                            printf("sra.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_ROTR_W:
+                            printf("rotr.w       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_ROTR_D:
+                            printf("rotr.d       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MUL_W:
+                            printf("mul.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_W:
+                            printf("mulh.w       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_WU:
+                            printf("mulh.wu      %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MUL_D:
+                            printf("mul.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_D:
+                            printf("mulh.d       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_DU:
+                            printf("mulh.du      %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MULW_D_W:
+                            printf("mulw.d.w     %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MULW_D_WU:
+                            printf("mulw.d.wu    %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_W:
+                            printf("div.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_W:
+                            printf("mod.w        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_WU:
+                            printf("div.wu       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_WU:
+                            printf("mod.wu       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_D:
+                            printf("div.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_D:
+                            printf("mod.d        %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_DU:
+                            printf("div.du       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_DU:
+                            printf("mod.du       %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_B_W:
+                            printf("crc.w.b.w    %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_H_W:
+                            printf("crc.w.h.w    %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_W_W:
+                            printf("crc.w.w.w    %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_D_W:
+                            printf("crc.w.d.w    %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_B_W:
+                            printf("crcc.w.b.w   %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_H_W:
+                            printf("crcc.w.h.w   %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_W_W:
+                            printf("crcc.w.w.w   %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_D_W:
+                            printf("crcc.w.d.w   %s, %s, %s\n", rd, rj, rk);
+                            return;
+                        default:
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                    }
+                }
+                case 0xa:
+                {
+                    // bits: 31-15,MSB17
+                    unsigned int inscode2  = (code >> 15) & 0x1ffff;
+                    unsigned int codefield = code & 0x7fff;
+                    switch (inscode2)
+                    {
+                        case LA_OP_BREAK:
+                            printf("break        0x%x\n", codefield);
+                            return;
+                        case LA_OP_DBGCALL:
+                            printf("dbgcall      0x%x\n", codefield);
+                            return;
+                        case LA_OP_SYSCALL:
+                            printf("syscall      0x%x\n", codefield);
+                            return;
+                        default:
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                    }
+                }
+                case LA_OP_ALSL_D: // 0xb
+                {
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
+                    unsigned int sa2 = (code >> 15) & 0x3;
+                    printf("alsl.d       %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1));
+                    return;
+                }
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+            return;
+        }
+        case 0x1:
+        {
+            if (code & 0x200000)
+            {
+                // LA_OP_BSTRINS_W
+                // LA_OP_BSTRPICK_W
+                const char*  rd   = RegNames[code & 0x1f];
+                const char*  rj   = RegNames[(code >> 5) & 0x1f];
+                unsigned int lsbw = (code >> 10) & 0x1f;
+                unsigned int msbw = (code >> 16) & 0x1f;
+                if (!(code & 0x8000))
+                {
+                    printf("bstrins.w    %s, %s, %d, %d\n", rd, rj, msbw, lsbw);
+                    return;
+                }
+                else if (code & 0x8000)
+                {
+                    printf("bstrpick.w   %s, %s, %d, %d\n", rd, rj, msbw, lsbw);
+                    return;
+                }
+                else
+                {
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+                }
+            }
+            else
+            {
+                // bits: 31-18,MSB14
+                unsigned int inscode1 = (code >> 18) & 0x3fff;
+                switch (inscode1)
+                {
+                    case 0x10:
+                    {
+                        // LA_OP_SLLI_W:
+                        // LA_OP_SLLI_D:
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
+                            printf("slli.w       %s, %s, %d\n", rd, rj, ui5);
+                            return;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
+                            printf("slli.d       %s, %s, %d\n", rd, rj, ui6);
+                            return;
+                        }
+                        else
+                        {
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    case 0x11:
+                    {
+                        // LA_OP_SRLI_W:
+                        // LA_OP_SRLI_D:
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
+                            printf("srli.w       %s, %s, %d\n", rd, rj, ui5);
+                            return;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
+                            printf("srli.d      %s, %s, %d\n", rd, rj, ui6);
+                            return;
+                        }
+                        else
+                        {
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    case 0x12:
+                    {
+                        // LA_OP_SRAI_W:
+                        // LA_OP_SRAI_D:
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
+                            printf("srai.w       %s, %s, %d\n", rd, rj, ui5);
+                            return;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
+                            printf("srai.d       %s, %s, %d\n", rd, rj, ui6);
+                            return;
+                        }
+                        else
+                        {
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    case 0x13:
+                    {
+                        // LA_OP_ROTRI_W:
+                        // LA_OP_ROTRI_D:
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
+                            printf("rotri.w      %s, %s, %d\n", rd, rj, ui5);
+                            return;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
+                            printf("rotri.d      %s, %s, %d\n", rd, rj, ui6);
+                            return;
+                        }
+                        else
+                        {
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    default:
+                        printf("LOONGARCH illegal instruction: %08X\n", code);
+                        return;
+                }
+                return;
+            }
+            return;
+        }
+        case LA_OP_BSTRINS_D:
+        {
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
+            unsigned int lsbd = (code >> 10) & 0x3f;
+            unsigned int msbd = (code >> 16) & 0x3f;
+            printf("bstrins.d    %s, %s, %d, %d\n", rd, rj, msbd, lsbd);
+            return;
+        }
+        case LA_OP_BSTRPICK_D:
+        {
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
+            unsigned int lsbd = (code >> 10) & 0x3f;
+            unsigned int msbd = (code >> 16) & 0x3f;
+            printf("bstrpick.d   %s, %s, %d, %d\n", rd, rj, msbd, lsbd);
+            return;
+        }
+        case 0x4:
+        {
+            // bits: 31-15,MSB17
+            unsigned int inscode1 = (code >> 15) & 0x1ffff;
+            const char*  fd       = RegNames[(code & 0x1f) + 32];
+            const char*  fj       = RegNames[((code >> 5) & 0x1f) + 32];
+            const char*  fk       = RegNames[((code >> 10) & 0x1f) + 32];
+            const char*  rd       = RegNames[code & 0x1f];
+            const char*  rj       = RegNames[(code >> 5) & 0x1f];
+
+            switch (inscode1)
+            {
+                case LA_3R_FADD_S:
+                    printf("fadd.s       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FADD_D:
+                    printf("fadd.d       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FSUB_S:
+                    printf("fsub.s       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FSUB_D:
+                    printf("fsub.d       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMUL_S:
+                    printf("fmul.s       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMUL_D:
+                    printf("fmul.d       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FDIV_S:
+                    printf("fdiv.s       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FDIV_D:
+                    printf("fdiv.d       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMAX_S:
+                    printf("fmax.s       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMAX_D:
+                    printf("fmax.d       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMIN_S:
+                    printf("fmin.s       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMIN_D:
+                    printf("fmin.d       %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMAXA_S:
+                    printf("fmaxa.s      %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMAXA_D:
+                    printf("fmaxa.d      %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMINA_S:
+                    printf("fmina.s      %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FMINA_D:
+                    printf("fmina.d      %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FSCALEB_S:
+                    printf("fscaleb.s    %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FSCALEB_D:
+                    printf("fscaleb.d    %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FCOPYSIGN_S:
+                    printf("fcopysign.s  %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case LA_3R_FCOPYSIGN_D:
+                    printf("fcopysign.d  %s, %s, %s\n", fd, fj, fk);
+                    return;
+                case 0x228:
+                case 0x229:
+                case 0x232:
+                case 0x234:
+                case 0x235:
+                case 0x236:
+                case 0x23a:
+                case 0x23c:
+                {
+                    // bits:31-10,MSB22
+                    unsigned int inscode2 = (code >> 10) & 0x3fffff;
+                    switch (inscode2)
+                    {
+                        case LA_2R_FABS_S:
+                            printf("fabs.s       %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FABS_D:
+                            printf("fabs.d       %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FNEG_S:
+                            printf("fneg.s       %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FNEG_D:
+                            printf("fneg.d       %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FLOGB_S:
+                            printf("flogb.s      %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FLOGB_D:
+                            printf("flogb.d      %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FCLASS_S:
+                            printf("fclass.s     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FCLASS_D:
+                            printf("fclass.d     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FSQRT_S:
+                            printf("fsqrt.s      %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FSQRT_D:
+                            printf("fsqrt.d      %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FRECIP_S:
+                            printf("frecip.s     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FRECIP_D:
+                            printf("frecip.d     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FRSQRT_S:
+                            printf("frsqrt.s     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FRSQRT_D:
+                            printf("frsqrt.d     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FMOV_S:
+                            printf("fmov.s       %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FMOV_D:
+                            printf("fmov.d       %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_MOVGR2FR_W:
+                            printf("movgr2fr.w   %s, %s\n", fd, rj);
+                            return;
+                        case LA_2R_MOVGR2FR_D:
+                            printf("movgr2fr.d   %s, %s\n", fd, rj);
+                            return;
+                        case LA_2R_MOVGR2FRH_W:
+                            printf("movgr2frh.w  %s, %s\n", fd, rj);
+                            return;
+                        case LA_2R_MOVFR2GR_S:
+                            printf("movfr2gr.s   %s, %s\n", rd, fj);
+                            return;
+                        case LA_2R_MOVFR2GR_D:
+                            printf("movfr2gr.d   %s, %s\n", rd, fj);
+                            return;
+                        case LA_2R_MOVFRH2GR_S:
+                            printf("movfrh2gr.s  %s, %s\n", rd, fj);
+                            return;
+                        case LA_2R_MOVGR2FCSR:
+                            NYI_LOONGARCH64("unused instr LA_2R_MOVGR2FCSR");
+                            return;
+                        case LA_2R_MOVFCSR2GR:
+                            NYI_LOONGARCH64("unused instr LA_2R_MOVFCSR2GR");
+                            return;
+                        case LA_2R_MOVFR2CF:
+                        {
+                            const char* cd = CFregName[code & 0x7];
+                            printf("movfr2cf     %s, %s\n", cd, fj);
+                            return;
+                        }
+                        case LA_2R_MOVCF2FR:
+                        {
+                            const char* cj = CFregName[(code >> 5) & 0x7];
+                            printf("movcf2fr     %s, %s\n", fd, cj);
+                            return;
+                        }
+                        case LA_2R_MOVGR2CF:
+                        {
+                            const char* cd = CFregName[code & 0x7];
+                            printf("movgr2cf     %s, %s\n", cd, rj);
+                            return;
+                        }
+                        case LA_2R_MOVCF2GR:
+                        {
+                            const char* cj = CFregName[(code >> 5) & 0x7];
+                            printf("movcf2gr     %s, %s\n", rd, cj);
+                            return;
+                        }
+                        case LA_2R_FCVT_S_D:
+                            printf("fcvt.s.d     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FCVT_D_S:
+                            printf("fcvt.d.s     %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_W_S:
+                            printf("ftintrm.w.s  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_W_D:
+                            printf("ftintrm.w.d  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_L_S:
+                            printf("ftintrm.l.s  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_L_D:
+                            printf("ftintrm.l.d  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_W_S:
+                            printf("ftintrp.w.s  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_W_D:
+                            printf("ftintrp.w.d  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_L_S:
+                            printf("ftintrp.l.s  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_L_D:
+                            printf("ftintrp.l.d  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_W_S:
+                            printf("ftintrz.w.s  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_W_D:
+                            printf("ftintrz.w.d  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_L_S:
+                            printf("ftintrz.l.s  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_L_D:
+                            printf("ftintrz.l.d  %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_W_S:
+                            printf("ftintrne.w.s %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_W_D:
+                            printf("ftintrne.w.d %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_L_S:
+                            printf("ftintrne.l.s %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_L_D:
+                            printf("ftintrne.l.d %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINT_W_S:
+                            printf("ftint.w.s    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINT_W_D:
+                            printf("ftint.w.d    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINT_L_S:
+                            printf("ftint.l.s    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FTINT_L_D:
+                            printf("ftint.l.d    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FFINT_S_W:
+                            printf("ffint.s.w    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FFINT_S_L:
+                            printf("ffint.s.l    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FFINT_D_W:
+                            printf("ffint.d.w    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FFINT_D_L:
+                            printf("ffint.d.l    %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FRINT_S:
+                            printf("frint.s      %s, %s\n", fd, fj);
+                            return;
+                        case LA_2R_FRINT_D:
+                            printf("frint.d      %s, %s\n", fd, fj);
+                            return;
+                        default:
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
+                            return;
+                    }
+                    return;
+                }
+
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+            return;
+        }
+        case LA_2RI12_SLTI: // 0x8
+        {
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
+            si12 >>= 4;
+            printf("slti         %s, %s, %d\n", rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_SLTUI: // 0x9
+        {
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
+            si12 >>= 4;
+            printf("sltui        %s, %s, %d\n", rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_ADDI_W: // 0xa
+        {
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
+            si12 >>= 4;
+            printf("addi.w       %s, %s, %d\n", rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_ADDI_D: // 0xb
+        {
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
+            si12 >>= 4;
+            printf("addi.d       %s, %s, %ld\n", rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_LU52I_D: // 0xc
+        {
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
+            unsigned int si12 = (code >> 10) & 0xfff;
+            printf("lu52i.d      %s, %s, 0x%x\n", rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_ANDI: // 0xd
+        {
+            if (code == 0x03400000)
+            {
+                printf("nop\n");
+            }
+            else
+            {
+                const char*  rd   = RegNames[code & 0x1f];
+                const char*  rj   = RegNames[(code >> 5) & 0x1f];
+                unsigned int ui12 = ((code >> 10) & 0xfff);
+                printf("andi         %s, %s, 0x%x\n", rd, rj, ui12);
+            }
+            return;
+        }
+        case LA_2RI12_ORI: // 0xe
+        {
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
+            unsigned int ui12 = ((code >> 10) & 0xfff);
+            printf("ori          %s, %s, 0x%x\n", rd, rj, ui12);
+            return;
+        }
+        case LA_2RI12_XORI: // 0xf
+        {
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
+            unsigned int ui12 = ((code >> 10) & 0xfff);
+            printf("xori         %s, %s, 0x%x\n", rd, rj, ui12);
+            return;
+        }
+
+        default:
+            printf("LOONGARCH illegal instruction: %08X\n", code);
+            return;
+    }
+
+// Label_OPCODE_1:
+//    opcode = (code >> 24) & 0xff;
+//    //bits: 31-24,MSB8
+
+Label_OPCODE_2:
+    opcode = (code >> 20) & 0xfff;
+
+    // bits: 31-20,MSB12
+    switch (opcode)
+    {
+        case LA_4R_FMADD_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fmadd.s      %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FMADD_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fmadd.d      %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FMSUB_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fmsub.s      %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FMSUB_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fmsub.d      %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMADD_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fnmadd.s     %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMADD_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fnmadd.d     %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMSUB_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fnmsub.s     %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMSUB_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
+            printf("fnmsub.d     %s, %s, %s, %s\n", fd, fj, fk, fa);
+            return;
+        }
+        default:
+            printf("LOONGARCH illegal instruction: %08X\n", code);
+            return;
+    }
+
+Label_OPCODE_3:
+    opcode = (code >> 20) & 0xfff;
+
+    // bits: 31-20,MSB12
+    switch (opcode)
+    {
+        case LA_OP_FCMP_cond_S:
+        {
+            // bits:19-15,cond
+            unsigned int cond = (code >> 15) & 0x1f;
+            const char*  cd   = CFregName[code & 0x7];
+            const char*  fj   = RegNames[((code >> 5) & 0x1f) + 32];
+            const char*  fk   = RegNames[((code >> 10) & 0x1f) + 32];
+            switch (cond)
+            {
+                case 0x0:
+                    printf("fcmp.caf.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x1:
+                    printf("fcmp.saf.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x2:
+                    printf("fcmp.clt.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x3:
+                    printf("fcmp.slt.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x4:
+                    printf("fcmp.ceq.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x5:
+                    printf("fcmp.seq.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x6:
+                    printf("fcmp.cle.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x7:
+                    printf("fcmp.sle.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x8:
+                    printf("fcmp.cun.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x9:
+                    printf("fcmp.sun.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xA:
+                    printf("fcmp.cult.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xB:
+                    printf("fcmp.sult.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xC:
+                    printf("fcmp.cueq.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xD:
+                    printf("fcmp.sueq.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xE:
+                    printf("fcmp.cule.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xF:
+                    printf("fcmp.sule.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x10:
+                    printf("fcmp.cne.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x11:
+                    printf("fcmp.sne.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x14:
+                    printf("fcmp.cor.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x15:
+                    printf("fcmp.sor.s   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x18:
+                    printf("fcmp.cune.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x19:
+                    printf("fcmp.sune.s  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+        }
+        case LA_OP_FCMP_cond_D:
+        {
+            // bits:19-15,cond
+            unsigned int cond = (code >> 15) & 0x1f;
+            const char*  cd   = CFregName[code & 0x7];
+            const char*  fj   = RegNames[((code >> 5) & 0x1f) + 32];
+            const char*  fk   = RegNames[((code >> 10) & 0x1f) + 32];
+            switch (cond)
+            {
+                case 0x0:
+                    printf("fcmp.caf.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x1:
+                    printf("fcmp.saf.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x2:
+                    printf("fcmp.clt.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x3:
+                    printf("fcmp.slt.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x4:
+                    printf("fcmp.ceq.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x5:
+                    printf("fcmp.seq.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x6:
+                    printf("fcmp.cle.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x7:
+                    printf("fcmp.sle.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x8:
+                    printf("fcmp.cun.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x9:
+                    printf("fcmp.sun.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xA:
+                    printf("fcmp.cult.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xB:
+                    printf("fcmp.sult.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xC:
+                    printf("fcmp.cueq.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xD:
+                    printf("fcmp.sueq.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xE:
+                    printf("fcmp.cule.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0xF:
+                    printf("fcmp.sule.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x10:
+                    printf("fcmp.cne.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x11:
+                    printf("fcmp.sne.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x14:
+                    printf("fcmp.cor.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x15:
+                    printf("fcmp.sor.d   %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x18:
+                    printf("fcmp.cune.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                case 0x19:
+                    printf("fcmp.sune.d  %s, %s, %s\n", cd, fj, fk);
+                    return;
+                default:
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
+                    return;
+            }
+        }
+        case LA_4R_FSEL:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* ca = CFregName[(code >> 15) & 0x7];
+            printf("fsel         %s, %s, %s, %s\n", fd, fj, fk, ca);
+            return;
+        }
+        default:
+            printf("LOONGARCH illegal instruction: %08X\n", code);
+            return;
+    }
+
+Label_OPCODE_E:
+    opcode = (code >> 15) & 0x1ffff;
+
+    // bits: 31-15,MSB17
+    switch (opcode)
+    {
+        case LA_3R_LDX_B:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.b        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_H:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.h        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.w        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.d        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_B:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stx.b        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_H:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stx.h        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stx.w        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stx.d        %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_BU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.bu       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_HU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.hu       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_WU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldx.wu       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_PRELDX:
+            NYI_LOONGARCH64("unused instr LA_3R_PRELDX");
+            return;
+        case LA_3R_FLDX_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fldx.s       %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDX_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fldx.d       %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTX_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fstx.s       %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTX_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fstx.d       %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amswap.w     %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amswap.d     %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amadd.w      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amadd.d      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amand.w      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amand.d      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amor.w       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amor.d       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amxor.w      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amxor.d      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax.w      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax.d      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin.w      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin.d      %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_WU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax.wu     %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax.du     %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_WU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin.wu     %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin.du     %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amswap_db.w  %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amswap_db.d  %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amadd_db.w   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amadd_db.d   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amand_db.w   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amand_db.d   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amor_db.w    %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amor_db.d    %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amxor_db.w   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("amxor_db.d   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax_db.w   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax_db.d   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin_db.w   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin_db.d   %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_WU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax_db.wu  %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_DU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammax_db.du  %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_WU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin_db.wu  %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_DU:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ammin_db.du  %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_OP_DBAR:
+        {
+            unsigned int hint = code & 0x7fff;
+            printf("dbar         0x%x\n", hint);
+            return;
+        }
+        case LA_OP_IBAR:
+        {
+            unsigned int hint = code & 0x7fff;
+            printf("ibar         0x%x\n", hint);
+            return;
+        }
+        case LA_3R_FLDGT_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fldgt.s      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDGT_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fldgt.d      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDLE_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fldle.s      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDLE_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fldle.d      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTGT_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fstgt.s      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTGT_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fstgt.d      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTLE_S:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fstle.s      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTLE_D:
+        {
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("fstle.d      %s, %s, %s\n", fd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_B:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldgt.b       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_H:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldgt.h       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldgt.w       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldgt.d       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_B:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldle.b       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_H:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldle.h       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldle.w       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("ldle.d       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_B:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stgt.b       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_H:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stgt.h       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stgt.w       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stgt.d       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_B:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stle.b       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_H:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stle.h       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_W:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stle.w       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_D:
+        {
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
+            printf("stle.d       %s, %s, %s\n", rd, rj, rk);
+            return;
+        }
+        default:
+            printf("LOONGARCH illegal instruction: %08X\n", code);
+            return;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
+{
+    // We do not display the instruction hex if we want diff-able disassembly
+    if (!emitComp->opts.disDiffable)
+    {
+        if (sz == 4)
+        {
+            printf("  %08X    ", (*((code_t*)code)));
+        }
+        else
+        {
+            assert(sz == 0);
+            printf("              ");
+        }
+    }
+}
+
+void emitter::emitDispIns(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+{
+    // LA implements this similar by `emitter::emitDisInsName`.
+    // For LA maybe the `emitDispIns` is over complicate.
+    // The `emitter::emitDisInsName` is focused on the most important for debugging.
+    NYI_LOONGARCH64("LA not used the emitter::emitDispIns");
+}
+
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+    printf("[");
+
+    if (varx < 0)
+        printf("TEMP_%02u", -varx);
+    else
+        emitComp->gtDispLclVar(+varx, false);
+
+    if (disp < 0)
+        printf("-0x%02x", -disp);
+    else if (disp > 0)
+        printf("+0x%02x", +disp);
+
+    printf("]");
+
+    if (varx >= 0 && emitComp->opts.varNames)
+    {
+        LclVarDsc*  varDsc;
+        const char* varName;
+
+        assert((unsigned)varx < emitComp->lvaCount);
+        varDsc  = emitComp->lvaTable + varx;
+        varName = emitComp->compLocalVarName(varx, offs);
+
+        if (varName)
+        {
+            printf("'%s", varName);
+
+            if (disp < 0)
+                printf("-%d", -disp);
+            else if (disp > 0)
+                printf("+%d", +disp);
+
+            printf("'");
+        }
+    }
+}
+
+#endif // DEBUG
+
+// Generate code for a load or store operation with a potentially complex addressing mode
+// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset]
+// Since LOONGARCH64 does not directly support this complex of an addressing mode
+// we may generates up to three instructions for this for LOONGARCH64
+//
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
+{
+    GenTree* addr = indir->Addr();
+
+    if (addr->isContained())
+    {
+        assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA));
+
+        int   offset = 0;
+        DWORD lsl    = 0;
+
+        if (addr->OperGet() == GT_LEA)
+        {
+            offset = addr->AsAddrMode()->Offset();
+            if (addr->AsAddrMode()->gtScale > 0)
+            {
+                assert(isPow2(addr->AsAddrMode()->gtScale));
+                BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+            }
+        }
+
+        GenTree* memBase = indir->Base();
+        emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE;
+
+        if (indir->HasIndex())
+        {
+            GenTree* index = indir->Index();
+
+            if (offset != 0)
+            {
+                regNumber tmpReg = indir->GetSingleTempReg();
+
+                if (isValidSimm12(offset))
+                {
+                    if (lsl > 0)
+                    {
+                        // Generate code to set tmpReg = base + index*scale
+                        emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl);
+                        emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), REG_R21);
+                    }
+                    else // no scale
+                    {
+                        // Generate code to set tmpReg = base + index
+                        emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum());
+                    }
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+
+                    // Then load/store dataReg from/to [tmpReg + offset]
+                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset);
+                }
+                else // large offset
+                {
+                    // First load/store tmpReg with the large offset constant
+                    emitIns_I_la(EA_PTRSIZE, tmpReg,
+                                 offset); // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                    // Then add the base register
+                    //      rd = rd + base
+                    emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, memBase->GetRegNum());
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+                    noway_assert(tmpReg != index->GetRegNum());
+
+                    // Then load/store dataReg from/to [tmpReg + index*scale]
+                    emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl);
+                    emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, REG_R21);
+                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0);
+                }
+            }
+            else // (offset == 0)
+            {
+                // Then load/store dataReg from/to [memBase + index]
+                switch (EA_SIZE(emitTypeSize(indir->TypeGet())))
+                {
+                    case EA_1BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
+                        if (ins <= INS_ld_wu)
+                        {
+                            if (varTypeIsUnsigned(indir->TypeGet()))
+                                ins = INS_ldx_bu;
+                            else
+                                ins = INS_ldx_b;
+                        }
+                        else
+                            ins = INS_stx_b;
+                        break;
+                    case EA_2BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
+                        if (ins <= INS_ld_wu)
+                        {
+                            if (varTypeIsUnsigned(indir->TypeGet()))
+                                ins = INS_ldx_hu;
+                            else
+                                ins = INS_ldx_h;
+                        }
+                        else
+                            ins = INS_stx_h;
+                        break;
+                    case EA_4BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) ||
+                               (ins == INS_fst_s) || (ins == INS_fld_s));
+                        assert(INS_fst_s > INS_st_d);
+                        if (ins <= INS_ld_wu)
+                        {
+                            if (varTypeIsUnsigned(indir->TypeGet()))
+                                ins = INS_ldx_wu;
+                            else
+                                ins = INS_ldx_w;
+                        }
+                        else if (ins == INS_fld_s)
+                            ins = INS_fldx_s;
+                        else if (ins == INS_fst_s)
+                            ins = INS_fstx_s;
+                        else
+                            ins = INS_stx_w;
+                        break;
+                    case EA_8BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) ||
+                               (ins == INS_fst_d) || (ins == INS_fld_d));
+                        assert(INS_fst_d > INS_st_d);
+                        if (ins <= INS_ld_wu)
+                        {
+                            ins = INS_ldx_d;
+                        }
+                        else if (ins == INS_fld_d)
+                            ins = INS_fldx_d;
+                        else if (ins == INS_fst_d)
+                            ins = INS_fstx_d;
+                        else
+                            ins = INS_stx_d;
+                        break;
+                    default:
+                        assert(!"------------TODO for LOONGARCH64: unsupported ins.");
+                }
+
+                if (lsl > 0)
+                {
+                    // Then load/store dataReg from/to [memBase + index*scale]
+                    emitIns_R_R_I(INS_slli_d, emitActualTypeSize(index->TypeGet()), REG_R21, index->GetRegNum(), lsl);
+                    emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), REG_R21);
+                }
+                else // no scale
+                {
+                    emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
+                }
+            }
+        }
+        else // no Index register
+        {
+            if (addr->OperGet() == GT_CLS_VAR_ADDR)
+            {
+                // Get a temp integer register to compute long address.
+                regNumber addrReg = indir->GetSingleTempReg();
+                emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0);
+            }
+            else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+            {
+                GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+                unsigned             lclNum  = varNode->GetLclNum();
+                unsigned             offset  = varNode->GetLclOffs();
+                if (emitInsIsStore(ins))
+                {
+                    emitIns_S_R(ins, attr, dataReg, lclNum, offset);
+                }
+                else
+                {
+                    emitIns_R_S(ins, attr, dataReg, lclNum, offset);
+                }
+            }
+            else if (isValidSimm12(offset))
+            {
+                // Then load/store dataReg from/to [memBase + offset]
+                emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset);
+            }
+            else
+            {
+                // We require a tmpReg to hold the offset
+                regNumber tmpReg = indir->GetSingleTempReg();
+
+                // First load/store tmpReg with the large offset constant
+                emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+                // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+                // Then load/store dataReg from/to [memBase + tmpReg]
+                emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), tmpReg);
+                emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0);
+            }
+        }
+    }
+    else // addr is not contained, so we evaluate it into a register
+    {
+#ifdef DEBUG
+        if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+        {
+            // If the local var is a gcref or byref, the local var better be untracked, because we have
+            // no logic here to track local variable lifetime changes, like we do in the contained case
+            // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local
+            // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth.
+            GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+            unsigned             lclNum  = varNode->GetLclNum();
+            LclVarDsc*           varDsc  = emitComp->lvaGetDesc(lclNum);
+            assert(!varDsc->lvTracked);
+        }
+#endif // DEBUG
+        // Then load/store dataReg from/to [addrReg]
+        emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0);
+    }
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+    NYI_LOONGARCH64("emitInsBinary-----unused");
+    return REG_R0;
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // find immed (if any) - it cannot be a dst
+    // Only one src can be an int.
+    GenTreeIntConCommon* intConst  = nullptr;
+    GenTree*             nonIntReg = nullptr;
+
+    bool needCheckOv = dst->gtOverflowEx();
+
+    if (varTypeIsFloating(dst))
+    {
+        // src1 can only be a reg
+        assert(!src1->isContained());
+        // src2 can only be a reg
+        assert(!src2->isContained());
+    }
+    else // not floating point
+    {
+        // src2 can be immed or reg
+        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+        // Check src2 first as we can always allow it to be a contained immediate
+        if (src2->isContainedIntOrIImmed())
+        {
+            intConst  = src2->AsIntConCommon();
+            nonIntReg = src1;
+        }
+        // Only for commutative operations do we check src1 and allow it to be a contained immediate
+        else if (dst->OperIsCommutative())
+        {
+            // src1 can be immed or reg
+            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+            // Check src1 and allow it to be a contained immediate
+            if (src1->isContainedIntOrIImmed())
+            {
+                assert(!src2->isContainedIntOrIImmed());
+                intConst  = src1->AsIntConCommon();
+                nonIntReg = src2;
+            }
+        }
+        else
+        {
+            // src1 can only be a reg
+            assert(!src1->isContained());
+        }
+    }
+
+    if (needCheckOv)
+    {
+        if (ins == INS_add_d)
+        {
+            assert(attr == EA_8BYTE);
+        }
+        else if (ins == INS_add_w) // || ins == INS_add
+        {
+            assert(attr == EA_4BYTE);
+        }
+        else if (ins == INS_addi_d)
+        {
+            assert(intConst != nullptr);
+        }
+        else if (ins == INS_addi_w)
+        {
+            assert(intConst != nullptr);
+        }
+        else if (ins == INS_sub_d)
+        {
+            assert(attr == EA_8BYTE);
+        }
+        else if (ins == INS_sub_w)
+        {
+            assert(attr == EA_4BYTE);
+        }
+        else if ((ins == INS_mul_d) || (ins == INS_mulh_d) || (ins == INS_mulh_du))
+        {
+            assert(attr == EA_8BYTE);
+            // NOTE: overflow format doesn't support an int constant operand directly.
+            assert(intConst == nullptr);
+        }
+        else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) ||
+                 (ins == INS_mulw_d_wu))
+        {
+            assert(attr == EA_4BYTE);
+            // NOTE: overflow format doesn't support an int constant operand directly.
+            assert(intConst == nullptr);
+        }
+        else
+        {
+#ifdef DEBUG
+            printf("LOONGARCH64-Invalid ins for overflow check: %s\n", codeGen->genInsName(ins));
+#endif
+            assert(!"Invalid ins for overflow check");
+        }
+    }
+
+    if (intConst != nullptr)
+    {
+        ssize_t imm = intConst->IconValue();
+        if (ins == INS_andi || ins == INS_ori || ins == INS_xori)
+        {
+            assert(isValidUimm12(imm));
+        }
+        else
+        {
+            assert(isValidSimm12(imm));
+        }
+
+        if (ins == INS_sub_d)
+        {
+            assert(attr == EA_8BYTE);
+            assert(imm != -2048);
+            ins = INS_addi_d;
+            imm = -imm;
+        }
+        else if (ins == INS_sub_w)
+        {
+            assert(attr == EA_4BYTE);
+            assert(imm != -2048);
+            ins = INS_addi_w;
+            imm = -imm;
+        }
+
+        assert(ins == INS_addi_d || ins == INS_addi_w || ins == INS_andi || ins == INS_ori || ins == INS_xori);
+
+        if (needCheckOv)
+        {
+            emitIns_R_R_R(INS_or, attr, REG_R21, nonIntReg->GetRegNum(), REG_R0);
+        }
+
+        emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), imm);
+
+        if (needCheckOv)
+        {
+            if (ins == INS_addi_d || ins == INS_addi_w)
+            {
+                // A = B + C
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, dst->GetRegNum(), nullptr, REG_R21);
+                }
+                else
+                {
+                    if (imm > 0)
+                    {
+                        // B > 0 and C > 0, if A < B, goto overflow
+                        BasicBlock* tmpLabel = codeGen->genCreateTempLabel();
+                        emitIns_J_cond_la(INS_bge, tmpLabel, REG_R0, REG_R21);
+                        emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_R21, dst->GetRegNum(), imm);
+
+                        codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+
+                        codeGen->genDefineTempLabel(tmpLabel);
+                    }
+                    else if (imm < 0)
+                    {
+                        // B < 0 and C < 0, if A > B, goto overflow
+                        BasicBlock* tmpLabel = codeGen->genCreateTempLabel();
+                        emitIns_J_cond_la(INS_bge, tmpLabel, REG_R21, REG_R0);
+                        emitIns_R_R_I(INS_addi_d, attr, REG_R21, REG_R0, imm);
+
+                        codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, REG_R21, nullptr, dst->GetRegNum());
+
+                        codeGen->genDefineTempLabel(tmpLabel);
+                    }
+                }
+            }
+            else
+            {
+                assert(!"unimplemented on LOONGARCH yet");
+            }
+        }
+    }
+    else if (varTypeIsFloating(dst))
+    {
+        emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+    }
+    else if (dst->OperGet() == GT_MUL)
+    {
+        if (!needCheckOv && !(dst->gtFlags & GTF_UNSIGNED))
+        {
+            emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+        }
+        else
+        {
+            if (needCheckOv)
+            {
+                assert(REG_R21 != dst->GetRegNum());
+                assert(REG_R21 != src1->GetRegNum());
+                assert(REG_R21 != src2->GetRegNum());
+
+                instruction ins2;
+
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    if (attr == EA_4BYTE)
+                        ins2 = INS_mulh_wu;
+                    else
+                        ins2 = INS_mulh_du;
+                }
+                else
+                {
+                    if (attr == EA_8BYTE)
+                        ins2 = INS_mulh_d;
+                    else
+                        ins2 = INS_mulh_w;
+                }
+
+                emitIns_R_R_R(ins2, attr, REG_R21, src1->GetRegNum(), src2->GetRegNum());
+            }
+
+            // n * n bytes will store n bytes result
+            emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+
+            if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+            {
+                if (attr == EA_4BYTE)
+                    emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32);
+            }
+
+            if (needCheckOv)
+            {
+                assert(REG_R21 != dst->GetRegNum());
+                assert(REG_R21 != src1->GetRegNum());
+                assert(REG_R21 != src2->GetRegNum());
+
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+                }
+                else
+                {
+                    assert(REG_RA != dst->GetRegNum());
+                    assert(REG_RA != src1->GetRegNum());
+                    assert(REG_RA != src2->GetRegNum());
+                    size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31;
+                    emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(),
+                                  imm);
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_RA);
+                }
+            }
+        }
+    }
+    else if (dst->OperIs(GT_AND, GT_AND_NOT, GT_OR, GT_XOR))
+    {
+        emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+
+        // TODO-LOONGARCH64-CQ: here sign-extend dst when deal with 32bit data is too conservative.
+        if (EA_SIZE(attr) == EA_4BYTE)
+            emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0);
+    }
+    else
+    {
+        regNumber regOp1       = src1->GetRegNum();
+        regNumber regOp2       = src2->GetRegNum();
+        regNumber saveOperReg1 = REG_NA;
+        regNumber saveOperReg2 = REG_NA;
+
+        if ((dst->gtFlags & GTF_UNSIGNED) && (attr == EA_8BYTE))
+        {
+            if (src1->gtType == TYP_INT)
+            {
+                assert(REG_R21 != regOp1);
+                assert(REG_RA != regOp1);
+                emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp1, /*src1->GetRegNum(),*/ 31, 0);
+                regOp1 = REG_RA; // dst->ExtractTempReg();
+            }
+            if (src2->gtType == TYP_INT)
+            {
+                assert(REG_R21 != regOp2);
+                assert(REG_RA != regOp2);
+                emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_R21, regOp2, /*src2->GetRegNum(),*/ 31, 0);
+                regOp2 = REG_R21; // dst->ExtractTempReg();
+            }
+        }
+        if (needCheckOv)
+        {
+            assert(!varTypeIsFloating(dst));
+
+            assert(REG_R21 != dst->GetRegNum());
+            assert(REG_RA != dst->GetRegNum());
+
+            if (dst->GetRegNum() == regOp1)
+            {
+                assert(REG_R21 != regOp1);
+                assert(REG_RA != regOp1);
+                saveOperReg1 = REG_R21;
+                saveOperReg2 = regOp2;
+                emitIns_R_R_R(INS_or, attr, REG_R21, regOp1, REG_R0);
+            }
+            else if (dst->GetRegNum() == regOp2)
+            {
+                assert(REG_R21 != regOp2);
+                assert(REG_RA != regOp2);
+                saveOperReg1 = regOp1;
+                saveOperReg2 = REG_R21;
+                emitIns_R_R_R(INS_or, attr, REG_R21, regOp2, REG_R0);
+            }
+            else
+            {
+                saveOperReg1 = regOp1;
+                saveOperReg2 = regOp2;
+            }
+        }
+
+        emitIns_R_R_R(ins, attr, dst->GetRegNum(), regOp1, regOp2);
+
+        if (needCheckOv)
+        {
+            if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB)
+            {
+                ssize_t   imm;
+                regNumber tempReg1;
+                regNumber tempReg2;
+                // ADD : A = B + C
+                // SUB : C = A - B
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    // if A < B, goto overflow
+                    if (dst->OperGet() == GT_ADD)
+                    {
+                        tempReg1 = dst->GetRegNum();
+                        tempReg2 = saveOperReg1;
+                    }
+                    else
+                    {
+                        tempReg1 = saveOperReg1;
+                        tempReg2 = saveOperReg2;
+                    }
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2);
+                }
+                else
+                {
+                    tempReg1 = REG_RA;
+                    tempReg2 = dst->GetSingleTempReg();
+                    assert(tempReg1 != tempReg2);
+                    assert(tempReg1 != saveOperReg1);
+                    assert(tempReg2 != saveOperReg2);
+
+                    ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63;
+                    if (dst->OperGet() == GT_ADD)
+                        emitIns_R_R_I(INS_srli_d, attr, tempReg1, saveOperReg1, ui6);
+                    else
+                        emitIns_R_R_I(INS_srli_d, attr, tempReg1, dst->GetRegNum(), ui6);
+                    emitIns_R_R_I(INS_srli_d, attr, tempReg2, saveOperReg2, ui6);
+
+                    emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2);
+                    if (attr == EA_4BYTE)
+                    {
+                        imm = 1;
+                        emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm);
+                        emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm);
+                    }
+                    // if (B > 0 && C < 0) || (B < 0  && C > 0), skip overflow
+                    BasicBlock* tmpLabel  = codeGen->genCreateTempLabel();
+                    BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel();
+                    BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel();
+
+                    emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0);
+
+                    emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0);
+
+                    // B > 0 and C > 0, if A < B, goto overflow
+                    emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1,
+                                      dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2);
+
+                    codeGen->genDefineTempLabel(tmpLabel2);
+
+                    codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW);
+
+                    codeGen->genDefineTempLabel(tmpLabel3);
+
+                    // B < 0 and C < 0, if A > B, goto overflow
+                    emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2,
+                                      dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1);
+
+                    codeGen->genDefineTempLabel(tmpLabel);
+                }
+            }
+            else
+            {
+#ifdef DEBUG
+                printf("---------[LOONGARCH64]-NOTE: UnsignedOverflow instruction %d\n", ins);
+#endif
+                assert(!"unimplemented on LOONGARCH yet");
+            }
+        }
+    }
+
+    return dst->GetRegNum();
+}
+
+unsigned emitter::get_curTotalCodeSize()
+{
+    return emitTotalCodeSize;
+}
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+
+//----------------------------------------------------------------------------------------
+// getInsExecutionCharacteristics:
+//    Returns the current instruction execution characteristics
+//
+// Arguments:
+//    id  - The current instruction descriptor to be evaluated
+//
+// Return Value:
+//    A struct containing the current instruction execution characteristics
+//
+// Notes:
+//    The instruction latencies and throughput values returned by this function
+//    are NOT accurate and just a function feature.
+emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id)
+{
+    insExecutionCharacteristics result;
+
+    // TODO-LoongArch64: support this function.
+    result.insThroughput       = PERFSCORE_THROUGHPUT_ZERO;
+    result.insLatency          = PERFSCORE_LATENCY_ZERO;
+    result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE;
+
+    return result;
+}
+
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name.
+//
+// TODO-LoongArch64: supporting SIMD.
+// Arguments:
+//    reg - A general-purpose register orfloating-point register.
+//    size - unused parameter.
+//    varName - unused parameter.
+//
+// Return value:
+//    A string that represents a general-purpose register name or floating-point scalar register name.
+//
+const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName)
+{
+    assert(reg < REG_COUNT);
+
+    const char* rn = nullptr;
+
+    rn = RegNames[reg];
+    assert(rn != nullptr);
+
+    return rn;
+}
+#endif
+
+//------------------------------------------------------------------------
+// IsMovInstruction: Determines whether a give instruction is a move instruction
+//
+// Arguments:
+//    ins       -- The instruction being checked
+//
+bool emitter::IsMovInstruction(instruction ins)
+{
+    switch (ins)
+    {
+        case INS_mov:
+        case INS_fmov_s:
+        case INS_fmov_d:
+        case INS_movgr2fr_w:
+        case INS_movgr2fr_d:
+        case INS_movfr2gr_s:
+        case INS_movfr2gr_d:
+        {
+            return true;
+        }
+
+        default:
+        {
+            return false;
+        }
+    }
+}
+#endif // defined(TARGET_LOONGARCH64)
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
new file mode 100644
index 0000000000000..d7e7cc5450acb
--- /dev/null
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -0,0 +1,241 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if defined(TARGET_LOONGARCH64)
+
+// The LOONGARCH64 instructions are all 32 bits in size.
+// we use an unsigned int to hold the encoded instructions.
+// This typedef defines the type that we use to hold encoded instructions.
+//
+typedef unsigned int code_t;
+
+/************************************************************************/
+/*         Routines that compute the size of / encode instructions      */
+/************************************************************************/
+
+struct CnsVal
+{
+    ssize_t cnsVal;
+    bool    cnsReloc;
+};
+
+#ifdef DEBUG
+
+/************************************************************************/
+/*             Debug-only routines to display instructions              */
+/************************************************************************/
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+const char* emitVectorRegName(regNumber reg);
+
+void emitDisInsName(code_t code, const BYTE* addr, instrDesc* id);
+#endif // DEBUG
+
+void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0);
+void emitIns_I_la(emitAttr attr, regNumber reg, ssize_t imm);
+
+/************************************************************************/
+/*  Private members that deal with target-dependent instr. descriptors  */
+/************************************************************************/
+
+private:
+instrDesc* emitNewInstrCallDir(int              argCnt,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+instrDesc* emitNewInstrCallInd(int              argCnt,
+                               ssize_t          disp,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+/************************************************************************/
+/*               Private helpers for instruction output                 */
+/************************************************************************/
+
+private:
+bool emitInsIsLoad(instruction ins);
+bool emitInsIsStore(instruction ins);
+bool emitInsIsLoadOrStore(instruction ins);
+
+emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/);
+
+// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset]
+void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir);
+
+//  Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst'  buffer
+unsigned emitOutput_Instr(BYTE* dst, code_t code);
+
+// Method to do check if mov is redundant with respect to the last instruction.
+// If yes, the caller of this method can choose to omit current mov instruction.
+static bool IsMovInstruction(instruction ins);
+bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
+bool IsRedundantLdStr(
+    instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
+
+/************************************************************************/
+/*           Public inline informational methods                        */
+/************************************************************************/
+
+public:
+// Returns true if 'value' is a legal signed immediate 12 bit encoding.
+static bool isValidSimm12(ssize_t value)
+{
+    return -(((int)1) << 11) <= value && value < (((int)1) << 11);
+};
+
+// Returns true if 'value' is a legal unsigned immediate 12 bit encoding.
+static bool isValidUimm12(ssize_t value)
+{
+    return (0 == (value >> 12));
+}
+
+// Returns true if 'value' is a legal unsigned immediate 11 bit encoding.
+static bool isValidUimm11(ssize_t value)
+{
+    return (0 == (value >> 11));
+}
+
+// Returns true if 'value' is a legal signed immediate 20 bit encoding.
+static bool isValidSimm20(ssize_t value)
+{
+    return -(((int)1) << 19) <= value && value < (((int)1) << 19);
+};
+
+// Returns true if 'value' is a legal signed immediate 38 bit encoding.
+static bool isValidSimm38(ssize_t value)
+{
+    return -(((ssize_t)1) << 37) <= value && value < (((ssize_t)1) << 37);
+};
+
+// Returns the number of bits used by the given 'size'.
+inline static unsigned getBitWidth(emitAttr size)
+{
+    assert(size <= EA_8BYTE);
+    return (unsigned)size * BITS_PER_BYTE;
+}
+
+inline static bool isGeneralRegister(regNumber reg)
+{
+    return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
+}
+
+inline static bool isGeneralRegisterOrR0(regNumber reg)
+{
+    return (reg >= REG_FIRST) && (reg <= REG_INT_LAST);
+} // Includes REG_R0
+
+inline static bool isFloatReg(regNumber reg)
+{
+    return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
+}
+
+/************************************************************************/
+/*           The public entry points to output instructions             */
+/************************************************************************/
+
+public:
+void emitIns(instruction ins);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
+void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_Mov(
+    instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags)
+{
+    emitIns_R_R(ins, attr, reg1, reg2);
+}
+
+void emitIns_R_R_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+// Checks for a large immediate that needs a second instruction
+void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm);
+
+void emitIns_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_I(instruction ins,
+                     emitAttr    attr,
+                     regNumber   reg1,
+                     regNumber   reg2,
+                     regNumber   reg3,
+                     ssize_t     imm,
+                     insOpts     opt      = INS_OPTS_NONE,
+                     emitAttr    attrReg2 = EA_UNKNOWN);
+
+void emitIns_R_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
+
+void emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
+
+void emitIns_R_AI(instruction ins,
+                  emitAttr    attr,
+                  regNumber   reg,
+                  ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
+
+enum EmitCallType
+{
+
+    // I have included here, but commented out, all the values used by the x86 emitter.
+    // However, LOONGARCH has a much reduced instruction set, and so the LOONGARCH emitter only
+    // supports a subset of the x86 variants.  By leaving them commented out, it becomes
+    // a compile time error if code tries to use them (and hopefully see this comment
+    // and know why they are unavailible on LOONGARCH), while making it easier to stay
+    // in-sync with x86 and possibly add them back in if needed.
+
+    EC_FUNC_TOKEN, //   Direct call to a helper/static/nonvirtual/global method
+                   //  EC_FUNC_TOKEN_INDIR,    // Indirect call to a helper/static/nonvirtual/global method
+    // EC_FUNC_ADDR,  // Direct call to an absolute address
+
+    //  EC_FUNC_VIRTUAL,        // Call to a virtual method (using the vtable)
+    EC_INDIR_R, // Indirect call via register
+                //  EC_INDIR_SR,            // Indirect call via stack-reference (local var)
+                //  EC_INDIR_C,             // Indirect call via static class var
+                //  EC_INDIR_ARD,           // Indirect call via an addressing mode
+
+    EC_COUNT
+};
+
+void emitIns_Call(EmitCallType          callType,
+                  CORINFO_METHOD_HANDLE methHnd,
+                  INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                  void*    addr,
+                  ssize_t  argSize,
+                  emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                  VARSET_VALARG_TP ptrVars,
+                  regMaskTP        gcrefRegs,
+                  regMaskTP        byrefRegs,
+                  const DebugInfo& di,
+                  regNumber        ireg   = REG_NA,
+                  regNumber        xreg   = REG_NA,
+                  unsigned         xmul   = 0,
+                  ssize_t          disp   = 0,
+                  bool             isJump = false);
+
+unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code);
+
+unsigned get_curTotalCodeSize(); // bytes of code
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h
index 4982104acc749..02ab3bb879d6f 100644
--- a/src/coreclr/jit/emitpub.h
+++ b/src/coreclr/jit/emitpub.h
@@ -139,7 +139,7 @@ static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADE
 /*                   Interface for generating unwind information        */
 /************************************************************************/
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL);
 
@@ -151,7 +151,7 @@ void emitSplit(emitLocation*         startLoc,
 
 void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp);
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || defined(TARGET_LOONGARCH64)
 
 #if defined(TARGET_ARM)
 
diff --git a/src/coreclr/jit/error.h b/src/coreclr/jit/error.h
index 450c24de3a456..618e5b3a7ee2d 100644
--- a/src/coreclr/jit/error.h
+++ b/src/coreclr/jit/error.h
@@ -174,6 +174,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    do { } while (0)
 #define NYI_ARM(msg)    do { } while (0)
 #define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) do { } while (0)
 
 #elif defined(TARGET_X86)
 
@@ -181,6 +182,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    NYIRAW("NYI_X86: " msg)
 #define NYI_ARM(msg)    do { } while (0)
 #define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) do { } while (0)
 
 #elif defined(TARGET_ARM)
 
@@ -188,6 +190,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    do { } while (0)
 #define NYI_ARM(msg)    NYIRAW("NYI_ARM: " msg)
 #define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) do { } while (0)
 
 #elif defined(TARGET_ARM64)
 
@@ -195,10 +198,18 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    do { } while (0)
 #define NYI_ARM(msg)    do { } while (0)
 #define NYI_ARM64(msg)  NYIRAW("NYI_ARM64: " msg)
+#define NYI_LOONGARCH64(msg) do { } while (0)
+
+#elif defined(TARGET_LOONGARCH64)
+#define NYI_AMD64(msg)  do { } while (0)
+#define NYI_X86(msg)    do { } while (0)
+#define NYI_ARM(msg)    do { } while (0)
+#define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) NYIRAW("NYI_LOONGARCH64: " msg)
 
 #else
 
-#error "Unknown platform, not x86, ARM, or AMD64?"
+#error "Unknown platform, not x86, ARM, LOONGARCH64 or AMD64?"
 
 #endif
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index d111ae7ed3f4f..319100e943b4a 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -3277,6 +3277,27 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
                 *pCostSz += 4;
             }
         }
+#elif defined(TARGET_LOONGARCH64)
+        if (base)
+        {
+            *pCostEx += base->GetCostEx();
+            *pCostSz += base->GetCostSz();
+        }
+
+        if (idx)
+        {
+            *pCostEx += idx->GetCostEx();
+            *pCostSz += idx->GetCostSz();
+        }
+        if (cns != 0)
+        {
+            if (!emitter::isValidSimm12(cns))
+            {
+                // TODO-LoongArch64-CQ: tune for LoongArch64.
+                *pCostEx += 1;
+                *pCostSz += 4;
+            }
+        }
 #else
 #error "Unknown TARGET"
 #endif
@@ -3691,13 +3712,24 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
             }
                 goto COMMON_CNS;
 
+#elif defined(TARGET_LOONGARCH64)
+            // TODO-LoongArch64-CQ: tune the costs.
+            case GT_CNS_STR:
+                costEx = IND_COST_EX + 2;
+                costSz = 4;
+                goto COMMON_CNS;
+
+            case GT_CNS_LNG:
+            case GT_CNS_INT:
+                costEx = 1;
+                costSz = 4;
+                goto COMMON_CNS;
 #else
             case GT_CNS_STR:
             case GT_CNS_LNG:
             case GT_CNS_INT:
 #error "Unknown TARGET"
 #endif
-
             COMMON_CNS:
                 /*
                     Note that some code below depends on constants always getting
@@ -3753,6 +3785,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costEx = IND_COST_EX;
                     costSz = 4;
                 }
+#elif defined(TARGET_LOONGARCH64)
+                // TODO-LoongArch64-CQ: tune the costs.
+                costEx = 2;
+                costSz = 8;
 #else
 #error "Unknown TARGET"
 #endif
@@ -3926,6 +3962,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                         costEx = IND_COST_EX * 2;
                         costSz = 6;
                     }
+#elif defined(TARGET_LOONGARCH64)
+                    // TODO-LoongArch64-CQ: tune the costs.
+                    costEx = 1;
+                    costSz = 4;
 #else
 #error "Unknown TARGET"
 #endif
@@ -6965,7 +7005,7 @@ bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp)
     }
 
 // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     if (!comp->opts.MinOpts() && ((divisorValue >= 3) || !isSignedDivide))
     {
         // All checks pass we can perform the division operation using a reciprocal multiply.
@@ -13823,7 +13863,6 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
         case TYP_INT:
 
             assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
-
             // No GC pointer types should be folded here...
             assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet()));
 
@@ -21851,6 +21890,44 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
                 m_regType[i] = comp->getJitGCType(gcPtrs[i]);
             }
 
+#elif defined(TARGET_LOONGARCH64)
+            assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
+
+            uint32_t floatFieldFlags = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd);
+            BYTE     gcPtrs[2]       = {TYPE_GC_NONE, TYPE_GC_NONE};
+            comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
+
+            if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+            {
+                comp->compFloatingPointUsed = true;
+                assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+            }
+            else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST)
+            {
+                comp->compFloatingPointUsed = true;
+                assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] =
+                    (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
+            }
+            else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
+            {
+                comp->compFloatingPointUsed = true;
+                assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0] =
+                    (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+            }
+            else
+            {
+                for (unsigned i = 0; i < 2; ++i)
+                {
+                    m_regType[i] = comp->getJitGCType(gcPtrs[i]);
+                }
+            }
+
 #elif defined(TARGET_X86)
 
             // an 8-byte struct returned using two registers
@@ -22041,6 +22118,27 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
         resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3
     }
 
+#elif defined(TARGET_LOONGARCH64)
+    var_types regType = GetReturnRegType(idx);
+    if (idx == 0)
+    {
+        resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // A0 or F0
+    }
+    else
+    {
+        noway_assert(idx == 1); // Up to 2 return registers for two-float-field structs
+
+        // If the first return register is from the same register file, return the one next to it.
+        if (varTypeIsIntegralOrI(regType))
+        {
+            resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // A0 or A1
+        }
+        else // varTypeUsesFloatReg(regType)
+        {
+            resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1
+        }
+    }
+
 #endif // TARGET_XXX
 
     assert(resultReg != REG_NA);
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 0c2367dbb2012..5552e6ecc77a6 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -4439,6 +4439,10 @@ struct GenTreeCall final : public GenTree
     bool HasMultiRegRetVal() const
     {
 #ifdef FEATURE_MULTIREG_RET
+#if defined(TARGET_LOONGARCH64)
+        return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1);
+#else
+
 #if defined(TARGET_X86) || defined(TARGET_ARM)
         if (varTypeIsLong(gtType))
         {
@@ -4452,6 +4456,8 @@ struct GenTreeCall final : public GenTree
         }
         // Now it is a struct that is returned in registers.
         return GetReturnTypeDesc()->IsMultiRegRetType();
+#endif
+
 #else  // !FEATURE_MULTIREG_RET
         return false;
 #endif // !FEATURE_MULTIREG_RET
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 54cedb357958b..4ab1844b66d4d 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -8541,7 +8541,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool                     allowWideni
         return true;
     }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     // Jit64 compat:
     if (callerRetType == TYP_VOID)
     {
@@ -8571,7 +8571,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool                     allowWideni
     {
         return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize);
     }
-#endif // TARGET_AMD64 || TARGET_ARMARCH
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64
 
     return false;
 }
@@ -10380,7 +10380,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree*                 op,
         return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv));
     }
 
-#elif FEATURE_MULTIREG_RET && defined(TARGET_ARM64)
+#elif FEATURE_MULTIREG_RET && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
 
     // Is method returning a multi-reg struct?
     if (IsMultiRegReturnedType(retClsHnd, unmgdCallConv))
@@ -10419,7 +10419,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree*                 op,
         return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv));
     }
 
-#endif //  FEATURE_MULTIREG_RET && TARGET_ARM64
+#endif //  FEATURE_MULTIREG_RET && (TARGET_ARM64 || TARGET_LOONGARCH64)
 
     if (!op->IsCall() || !op->AsCall()->TreatAsHasRetBufArg(this))
     {
@@ -14135,6 +14135,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
 
                 op1 = impPopStack().val;
+
                 impBashVarAddrsToI(op1);
 
                 // Casts from floating point types must not have GTF_UNSIGNED set.
@@ -17443,7 +17444,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
                     }
                 }
                 else
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                 ReturnTypeDesc retTypeDesc;
                 retTypeDesc.InitializeStructReturnType(this, retClsHnd, info.compCallConv);
                 unsigned retRegCount = retTypeDesc.GetReturnRegCount();
@@ -20811,6 +20812,9 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
         default:
             return false;
     }
+#elif defined(TARGET_LOONGARCH64)
+    // TODO-LoongArch64: add some instrinsics.
+    return false;
 #else
     // TODO: This portion of logic is not implemented for other arch.
     // The reason for returning true is that on all other arch the only intrinsic
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 34d55023a3ce5..67ae437f03b75 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -66,6 +66,10 @@ const char* CodeGen::genInsName(instruction ins)
         #define INST9(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
         #include "instrs.h"
 
+#elif defined(TARGET_LOONGARCH64)
+        #define INST(id, nm, ldst, e1) nm,
+        #include "instrs.h"
+
 #else
 #error "Unknown TARGET"
 #endif
@@ -420,7 +424,12 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s
         size = emitActualTypeSize(type);
     }
 
+#ifdef TARGET_LOONGARCH64
+    // inst_RV is not used for LoongArch64, so there is no need to define `emitIns_R`.
+    NYI_LOONGARCH64("inst_RV-----unused on LOONGARCH64----");
+#else
     GetEmitter()->emitIns_R(ins, size, reg);
+#endif
 }
 
 /*****************************************************************************
@@ -434,6 +443,31 @@ void CodeGen::inst_Mov(var_types dstType,
                        emitAttr  size,
                        insFlags  flags /* = INS_FLAGS_DONT_CARE */)
 {
+#ifdef TARGET_LOONGARCH64
+    if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg))
+    {
+        if (dstType == TYP_FLOAT)
+        {
+            dstType = TYP_INT;
+        }
+        else if (dstType == TYP_DOUBLE)
+        {
+            dstType = TYP_LONG;
+        }
+        else if (dstType == TYP_INT)
+        {
+            dstType = TYP_FLOAT;
+        }
+        else if (dstType == TYP_LONG)
+        {
+            dstType = TYP_DOUBLE;
+        }
+        else
+        {
+            NYI_LOONGARCH64("CodeGen::inst_Mov dstType");
+        }
+    }
+#endif
     instruction ins = ins_Copy(srcReg, dstType);
 
     if (size == EA_UNKNOWN)
@@ -523,7 +557,7 @@ void CodeGen::inst_RV_RV_RV(instruction ins,
 {
 #ifdef TARGET_ARM
     GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags);
-#elif defined(TARGET_XARCH)
+#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64)
     GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
 #else
     NYI("inst_RV_RV_RV");
@@ -599,6 +633,8 @@ void CodeGen::inst_RV_IV(
     assert(ins != INS_tst);
     assert(ins != INS_mov);
     GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val);
+#elif defined(TARGET_LOONGARCH64)
+    GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val);
 #else // !TARGET_ARM
 #ifdef TARGET_AMD64
     // Instead of an 8-byte immediate load, a 4-byte immediate will do fine
@@ -1221,6 +1257,8 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
  */
 instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
 {
+    NYI_LOONGARCH64("ins_Move_Extend");
+
     instruction ins = INS_invalid;
 
     if (varTypeIsSIMD(srcType))
@@ -1426,6 +1464,19 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         return INS_ldr;
 #elif defined(TARGET_ARM)
         return INS_vldr;
+#elif defined(TARGET_LOONGARCH64)
+        if (srcType == TYP_DOUBLE)
+        {
+            return INS_fld_d;
+        }
+        else if (srcType == TYP_FLOAT)
+        {
+            return INS_fld_s;
+        }
+        else
+        {
+            assert(!"unhandled floating type");
+        }
 #else
         assert(!varTypeIsFloating(srcType));
 #endif
@@ -1464,6 +1515,29 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         else
             ins = INS_ldrsh;
     }
+#elif defined(TARGET_LOONGARCH64)
+    if (varTypeIsByte(srcType))
+    {
+        if (varTypeIsUnsigned(srcType))
+            ins = INS_ld_bu;
+        else
+            ins = INS_ld_b;
+    }
+    else if (varTypeIsShort(srcType))
+    {
+        if (varTypeIsUnsigned(srcType))
+            ins = INS_ld_hu;
+        else
+            ins = INS_ld_h;
+    }
+    else if (TYP_INT == srcType)
+    {
+        ins = INS_ld_w;
+    }
+    else
+    {
+        ins = INS_ld_d; // default ld_d.
+    }
 #else
     NYI("ins_Load");
 #endif
@@ -1515,6 +1589,15 @@ instruction CodeGen::ins_Copy(var_types dstType)
     {
         return INS_mov;
     }
+#elif defined(TARGET_LOONGARCH64)
+    if (varTypeIsFloating(dstType))
+    {
+        return dstType == TYP_FLOAT ? INS_fmov_s : INS_fmov_d;
+    }
+    else
+    {
+        return INS_mov;
+    }
 #else // TARGET_*
 #error "Unknown TARGET_"
 #endif
@@ -1566,6 +1649,19 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
         assert(dstType == TYP_INT);
         return INS_vmov_f2i;
     }
+#elif defined(TARGET_LOONGARCH64)
+    // TODO-LoongArch64-CQ: supporting SIMD.
+    assert(!varTypeIsSIMD(dstType));
+    if (dstIsFloatReg)
+    {
+        assert(!genIsValidFloatReg(srcReg));
+        return dstType == TYP_FLOAT ? INS_movgr2fr_w : INS_movgr2fr_d;
+    }
+    else
+    {
+        assert(genIsValidFloatReg(srcReg));
+        return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movfr2gr_s : INS_movfr2gr_d;
+    }
 #else // TARGET*
 #error "Unknown TARGET"
 #endif
@@ -1578,6 +1674,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
  *  Parameters
  *      dstType   - destination type
  *      aligned   - whether destination is properly aligned if dstType is a SIMD type
+ *                - for LoongArch64 aligned is used for store-index.
  */
 instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/)
 {
@@ -1632,6 +1729,19 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
     {
         return INS_vstr;
     }
+#elif defined(TARGET_LOONGARCH64)
+    assert(!varTypeIsSIMD(dstType));
+    if (varTypeIsFloating(dstType))
+    {
+        if (dstType == TYP_DOUBLE)
+        {
+            return aligned ? INS_fstx_d : INS_fst_d;
+        }
+        else if (dstType == TYP_FLOAT)
+        {
+            return aligned ? INS_fstx_s : INS_fst_s;
+        }
+    }
 #else
     assert(!varTypeIsSIMD(dstType));
     assert(!varTypeIsFloating(dstType));
@@ -1646,6 +1756,15 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         ins = INS_strb;
     else if (varTypeIsShort(dstType))
         ins = INS_strh;
+#elif defined(TARGET_LOONGARCH64)
+    if (varTypeIsByte(dstType))
+        ins = aligned ? INS_stx_b : INS_st_b;
+    else if (varTypeIsShort(dstType))
+        ins = aligned ? INS_stx_h : INS_st_h;
+    else if (TYP_INT == dstType)
+        ins = aligned ? INS_stx_w : INS_st_w;
+    else
+        ins = aligned ? INS_stx_d : INS_st_d;
 #else
     NYI("ins_Store");
 #endif
@@ -1923,6 +2042,8 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla
     GetEmitter()->emitIns_R_I(INS_mov, size, reg, 0 ARM_ARG(flags));
 #elif defined(TARGET_ARM64)
     GetEmitter()->emitIns_Mov(INS_mov, size, reg, REG_ZR, /* canSkip */ true);
+#elif defined(TARGET_LOONGARCH64)
+    GetEmitter()->emitIns_R_R_I(INS_ori, size, reg, REG_R0, 0);
 #else
 #error "Unknown TARGET"
 #endif
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index d694df71be479..a01492d08b8a9 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -6,7 +6,11 @@
 #define _INSTR_H_
 /*****************************************************************************/
 
+#ifdef TARGET_LOONGARCH64
+#define BAD_CODE 0XFFFFFFFF
+#else
 #define BAD_CODE 0x0BADC0DE // better not match a real encoding!
+#endif
 
 /*****************************************************************************/
 
@@ -47,6 +51,11 @@ enum instruction : unsigned
 
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 
+#elif defined(TARGET_LOONGARCH64)
+    #define INST(id, nm, ldst, e1) INS_##id,
+    #include "instrs.h"
+
+    INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 #else
 #error Unsupported target architecture
 #endif
@@ -140,7 +149,7 @@ enum insFlags : uint32_t
     INS_FLAGS_DONT_CARE = 0x00,
 };
 
-#elif defined(TARGET_ARM) || defined(TARGET_ARM64)
+#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 // TODO-Cleanup: Move 'insFlags' under TARGET_ARM
 enum insFlags: unsigned
 {
@@ -292,6 +301,33 @@ enum insBarrier : unsigned
     INS_BARRIER_ST    = 14,
     INS_BARRIER_SY    = 15,
 };
+#elif defined(TARGET_LOONGARCH64)
+enum insOpts : unsigned
+{
+    INS_OPTS_NONE,
+
+    INS_OPTS_RC,     // see ::emitIns_R_C().
+    INS_OPTS_RL,     // see ::emitIns_R_L().
+    INS_OPTS_JIRL,   // see ::emitIns_J_R().
+    INS_OPTS_J,      // see ::emitIns_J().
+    INS_OPTS_J_cond, // see ::emitIns_J_cond_la().
+    INS_OPTS_I,      // see ::emitIns_I_la().
+    INS_OPTS_C,      // see ::emitIns_Call().
+    INS_OPTS_RELOC,  // see ::emitIns_R_AI().
+};
+
+enum insBarrier : unsigned
+{
+    // TODO-LOONGARCH64-CQ: ALL there are the same value right now.
+    // These are reserved for future extention.
+    // Because the LoongArch64 doesn't support these right now.
+    INS_BARRIER_FULL  =  0,
+    INS_BARRIER_WMB   =  INS_BARRIER_FULL,//4,
+    INS_BARRIER_MB    =  INS_BARRIER_FULL,//16,
+    INS_BARRIER_ACQ   =  INS_BARRIER_FULL,//17,
+    INS_BARRIER_REL   =  INS_BARRIER_FULL,//18,
+    INS_BARRIER_RMB   =  INS_BARRIER_FULL,//19,
+};
 #endif
 
 #undef EA_UNKNOWN
diff --git a/src/coreclr/jit/instrs.h b/src/coreclr/jit/instrs.h
index b543f781645f5..aa16547f44be7 100644
--- a/src/coreclr/jit/instrs.h
+++ b/src/coreclr/jit/instrs.h
@@ -7,6 +7,8 @@
 #include "instrsarm.h"
 #elif defined(TARGET_ARM64)
 #include "instrsarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "instrsloongarch64.h"
 #else
 #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
new file mode 100644
index 0000000000000..ada87672e397a
--- /dev/null
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -0,0 +1,488 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*****************************************************************************
+ *  LoongArch64 instructions for JIT compiler
+ *
+ *          id          -- the enum name for the instruction
+ *          nm          -- textual name (for assembly dipslay)
+ *          ld/st/cmp   -- load/store/compare instruction
+ *          encode      -- encoding 1
+ *
+******************************************************************************/
+
+#if !defined(TARGET_LOONGARCH64)
+#error Unexpected target type
+#endif
+
+#ifndef INST
+#error INST must be defined before including this file.
+#endif
+
+/*****************************************************************************/
+/*               The following is LOONGARCH64-specific                               */
+/*****************************************************************************/
+
+// If you're adding a new instruction:
+// You need not only to fill in one of these macros describing the instruction, but also:
+//   * If the instruction writes to more than one destination register, update the function
+//     emitInsMayWriteMultipleRegs in emitLoongarch64.cpp.
+
+// clang-format off
+INST(invalid,       "INVALID",        0,    BAD_CODE)
+INST(nop ,          "nop",            0,    0x03400000)
+
+                    // INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number.
+INST(bceqz,         "bceqz",          0,    0x48000000)
+INST(bcnez,         "bcnez",          0,    0x48000100)
+
+INST(beq,           "beq",            0,    0x58000000)
+INST(bne,           "bne",            0,    0x5c000000)
+
+INST(blt,           "blt",            0,    0x60000000)
+INST(bge,           "bge",            0,    0x64000000)
+INST(bltu,          "bltu",           0,    0x68000000)
+INST(bgeu,          "bgeu",           0,    0x6c000000)
+
+////R_I.
+INST(beqz,          "beqz",           0,    0x40000000)
+INST(bnez,          "bnez",           0,    0x44000000)
+
+////I.
+INST(b,             "b",              0,    0x50000000)
+INST(bl,            "bl",             0,    0x54000000)
+
+///////////////////////////////////////////////////////////////////////////////////////////
+////NOTE:  Begin
+////     the following instructions will be used by emitter::emitInsMayWriteToGCReg().
+////////////////////////////////////////////////
+//    enum     name     FP LD/ST   FMT   ENCODE
+//
+////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg().
+///////////////////////////////////////////////////////////////////////////////////////////
+//  mov     rd,rj
+//  In fact, mov is an alias instruction, "ori rd,rj,0"
+INST(mov,           "mov",            0,    0x03800000)
+                    //dneg is a alias instruction.
+                    //sub_d rd, zero, rk
+INST(dneg,          "dneg",           0,    0x00118000)
+                    //neg is a alias instruction.
+                    //sub_w rd, zero, rk
+INST(neg,           "neg",            0,    0x00110000)
+                    //not is a alias instruction.
+                    //nor rd, rj, zero
+INST(not,           "not",            0,    0x00140000)
+
+//   enum:id        name             FP   LD/ST   Formate   ENCODE
+////R_R_R.
+INST(add_w,         "add.w",          0,    0x00100000)
+INST(add_d,         "add.d",          0,    0x00108000)
+INST(sub_w,         "sub.w",          0,    0x00110000)
+INST(sub_d,         "sub.d",          0,    0x00118000)
+
+INST(and,           "and",            0,    0x00148000)
+INST(or,            "or",             0,    0x00150000)
+INST(nor,           "nor",            0,    0x00140000)
+INST(xor,           "xor",            0,    0x00158000)
+INST(andn,          "andn",           0,    0x00168000)
+INST(orn,           "orn",            0,    0x00160000)
+
+INST(mul_w,         "mul.w",          0,    0x001c0000)
+INST(mul_d,         "mul.d",          0,    0x001d8000)
+INST(mulh_w,        "mulh.w",         0,    0x001c8000)
+INST(mulh_wu,       "mulh.wu",        0,    0x001d0000)
+INST(mulh_d,        "mulh.d",         0,    0x001e0000)
+INST(mulh_du,       "mulh.du",        0,    0x001e8000)
+INST(mulw_d_w,      "mulw.d.w",       0,    0x001f0000)
+INST(mulw_d_wu,     "mulw.d.wu",      0,    0x001f8000)
+INST(div_w,         "div.w",          0,    0x00200000)
+INST(div_wu,        "div.wu",         0,    0x00210000)
+INST(div_d,         "div.d",          0,    0x00220000)
+INST(div_du,        "div.du",         0,    0x00230000)
+INST(mod_w,         "mod.w",          0,    0x00208000)
+INST(mod_wu,        "mod.wu",         0,    0x00218000)
+INST(mod_d,         "mod.d",          0,    0x00228000)
+INST(mod_du,        "mod.du",         0,    0x00238000)
+
+INST(sll_w,         "sll.w",          0,    0x00170000)
+INST(srl_w,         "srl.w",          0,    0x00178000)
+INST(sra_w,         "sra.w",          0,    0x00180000)
+INST(rotr_w,        "rotr_w",         0,    0x001b0000)
+INST(sll_d,         "sll.d",          0,    0x00188000)
+INST(srl_d,         "srl.d",          0,    0x00190000)
+INST(sra_d,         "sra.d",          0,    0x00198000)
+INST(rotr_d,        "rotr.d",         0,    0x001b8000)
+
+INST(maskeqz,       "maskeqz",        0,    0x00130000)
+INST(masknez,       "masknez",        0,    0x00138000)
+
+INST(slt,           "slt",            0,    0x00120000)
+INST(sltu,          "sltu",           0,    0x00128000)
+
+INST(amswap_w,      "amswap.w",       0,    0x38600000)
+INST(amswap_d,      "amswap.d",       0,    0x38608000)
+INST(amswap_db_w,   "amswap_db.w",    0,    0x38690000)
+INST(amswap_db_d,   "amswap_db.d",    0,    0x38698000)
+INST(amadd_w,       "amadd.w",        0,    0x38610000)
+INST(amadd_d,       "amadd.d",        0,    0x38618000)
+INST(amadd_db_w,    "amadd_db.w",     0,    0x386a0000)
+INST(amadd_db_d,    "amadd_db.d",     0,    0x386a8000)
+INST(amand_w,       "amand.w",        0,    0x38620000)
+INST(amand_d,       "amand.d",        0,    0x38628000)
+INST(amand_db_w,    "amand_db.w",     0,    0x386b0000)
+INST(amand_db_d,    "amand_db.d",     0,    0x386b8000)
+INST(amor_w,        "amor.w",         0,    0x38630000)
+INST(amor_d,        "amor.d",         0,    0x38638000)
+INST(amor_db_w,     "amor_db.w",      0,    0x386c0000)
+INST(amor_db_d,     "amor_db.d",      0,    0x386c8000)
+INST(amxor_w,       "amxor.w",        0,    0x38640000)
+INST(amxor_d,       "amxor.d",        0,    0x38648000)
+INST(amxor_db_w,    "amxor_db.w",     0,    0x386d0000)
+INST(amxor_db_d,    "amxor_db.d",     0,    0x386d8000)
+INST(ammax_w,       "ammax.w",        0,    0x38650000)
+INST(ammax_d,       "ammax.d",        0,    0x38658000)
+INST(ammax_db_w,    "ammax_db.w",     0,    0x386e0000)
+INST(ammax_db_d,    "ammax_db.d",     0,    0x386e8000)
+INST(ammin_w,       "ammin.w",        0,    0x38660000)
+INST(ammin_d,       "ammin.d",        0,    0x38668000)
+INST(ammin_db_w,    "ammin_db.w",     0,    0x386f0000)
+INST(ammin_db_d,    "ammin_db.d",     0,    0x386f8000)
+INST(ammax_wu,      "ammax.wu",       0,    0x38670000)
+INST(ammax_du,      "ammax.du",       0,    0x38678000)
+INST(ammax_db_wu,   "ammax_db.wu",    0,    0x38700000)
+INST(ammax_db_du,   "ammax_db.du",    0,    0x38708000)
+INST(ammin_wu,      "ammin.wu",       0,    0x38680000)
+INST(ammin_du,      "ammin.du",       0,    0x38688000)
+INST(ammin_db_wu,   "ammin_db.wu",    0,    0x38710000)
+INST(ammin_db_du,   "ammin_db.du",    0,    0x38718000)
+
+INST(crc_w_b_w,     "crc.w.b.w",      0,    0x00240000)
+INST(crc_w_h_w,     "crc.w.h.w",      0,    0x00248000)
+INST(crc_w_w_w,     "crc.w.w.w",      0,    0x00250000)
+INST(crc_w_d_w,     "crc.w.d.w",      0,    0x00258000)
+INST(crcc_w_b_w,    "crcc.w.b.w",     0,    0x00260000)
+INST(crcc_w_h_w,    "crcc.w.h.w",     0,    0x00268000)
+INST(crcc_w_w_w,    "crcc.w.w.w",     0,    0x00270000)
+INST(crcc_w_d_w,    "crcc.w.d.w",     0,    0x00278000)
+
+////R_R_R_I.
+INST(alsl_w,        "alsl.w",         0,    0x00040000)
+INST(alsl_wu,       "alsl.wu",        0,    0x00060000)
+INST(alsl_d,        "alsl.d",         0,    0x002c0000)
+
+INST(bytepick_w,    "bytepick.w",     0,    0x00080000)
+INST(bytepick_d,    "bytepick.d",     0,    0x000c0000)
+
+INST(fsel,          "fsel",           0,    0x0d000000)
+
+////R_I.
+INST(lu12i_w,       "lu12i.w",        0,    0x14000000)
+INST(lu32i_d,       "lu32i.d",        0,    0x16000000)
+
+INST(pcaddi,        "pcaddi",         0,    0x18000000)
+INST(pcaddu12i,     "pcaddu12i",      0,    0x1c000000)
+INST(pcalau12i,     "pcalau12i",      0,    0x1a000000)
+INST(pcaddu18i,     "pcaddu18i",      0,    0x1e000000)
+
+////R_R.
+INST(ext_w_b,       "ext.w.b",        0,    0x00005c00)
+INST(ext_w_h,       "ext.w.h",        0,    0x00005800)
+INST(clo_w,         "clo.w",          0,    0x00001000)
+INST(clz_w,         "clz.w",          0,    0x00001400)
+INST(cto_w,         "cto.w",          0,    0x00001800)
+INST(ctz_w,         "ctz.w",          0,    0x00001c00)
+INST(clo_d,         "clo.d",          0,    0x00002000)
+INST(clz_d,         "clz.d",          0,    0x00002400)
+INST(cto_d,         "cto.d",          0,    0x00002800)
+INST(ctz_d,         "ctz.d",          0,    0x00002c00)
+INST(revb_2h,       "revb.2h",        0,    0x00003000)
+INST(revb_4h,       "revb.4h",        0,    0x00003400)
+INST(revb_2w,       "revb.2w",        0,    0x00003800)
+INST(revb_d,        "revb.d",         0,    0x00003c00)
+INST(revh_2w,       "revh.2w",        0,    0x00004000)
+INST(revh_d,        "revh.d",         0,    0x00004400)
+INST(bitrev_4b,     "bitrev.4b",      0,    0x00004800)
+INST(bitrev_8b,     "bitrev.8b",      0,    0x00004c00)
+INST(bitrev_w,      "bitrev.w",       0,    0x00005000)
+INST(bitrev_d,      "bitrev.d",       0,    0x00005400)
+INST(rdtimel_w,     "rdtimel.w",      0,    0x00006000)
+INST(rdtimeh_w,     "rdtimeh.w",      0,    0x00006400)
+INST(rdtime_d,      "rdtime.d",       0,    0x00006800)
+INST(cpucfg,        "cpucfg",         0,    0x00006c00)
+
+////R_R_I_I.
+INST(bstrins_w,     "bstrins.w",      0,    0x00600000)
+INST(bstrins_d,     "bstrins.d",      0,    0x00800000)
+INST(bstrpick_w,    "bstrpick.w",     0,    0x00608000)
+INST(bstrpick_d,    "bstrpick.d",     0,    0x00c00000)
+
+////Load.
+INST(ld_b,          "ld.b",           LD,   0x28000000)
+INST(ld_h,          "ld.h",           LD,   0x28400000)
+INST(ld_w,          "ld.w",           LD,   0x28800000)
+INST(ld_d,          "ld.d",           LD,   0x28c00000)
+INST(ld_bu,         "ld.bu",          LD,   0x2a000000)
+INST(ld_hu,         "ld.hu",          LD,   0x2a400000)
+INST(ld_wu,         "ld.wu",          LD,   0x2a800000)
+
+INST(ldptr_w,       "ldptr.w",        LD,   0x24000000)
+INST(ldptr_d,       "ldptr.d",        LD,   0x26000000)
+INST(ll_w,          "ll.w",           0,    0x20000000)
+INST(ll_d,          "ll.d",           0,    0x22000000)
+
+INST(ldx_b,         "ldx.b",          LD,   0x38000000)
+INST(ldx_h,         "ldx.h",          LD,   0x38040000)
+INST(ldx_w,         "ldx.w",          LD,   0x38080000)
+INST(ldx_d,         "ldx.d",          LD,   0x380c0000)
+INST(ldx_bu,        "ldx.bu",         LD,   0x38200000)
+INST(ldx_hu,        "ldx.hu",         LD,   0x38240000)
+INST(ldx_wu,        "ldx.wu",         LD,   0x38280000)
+
+INST(ldgt_b,        "ldgt.b",         0,    0x38780000)
+INST(ldgt_h,        "ldgt.h",         0,    0x38788000)
+INST(ldgt_w,        "ldgt.w",         0,    0x38790000)
+INST(ldgt_d,        "ldgt.d",         0,    0x38798000)
+INST(ldle_b,        "ldle.b",         0,    0x387a0000)
+INST(ldle_h,        "ldle.h",         0,    0x387a8000)
+INST(ldle_w,        "ldle.w",         0,    0x387b0000)
+INST(ldle_d,        "ldle.d",         0,    0x387b8000)
+
+////R_R_I.
+INST(addi_w,        "addi.w",         0,    0x02800000)
+INST(addi_d,        "addi.d",         0,    0x02c00000)
+INST(lu52i_d,       "lu52i.d",        0,    0x03000000)
+INST(slti,          "slti",           0,    0x02000000)
+
+INST(sltui,         "sltui",          0,    0x02400000)
+INST(andi,          "andi",           0,    0x03400000)
+INST(ori,           "ori",            0,    0x03800000)
+INST(xori,          "xori",           0,    0x03c00000)
+
+INST(slli_w,        "slli.w",         0,    0x00408000)
+INST(srli_w,        "srli.w",         0,    0x00448000)
+INST(srai_w,        "srai.w",         0,    0x00488000)
+INST(rotri_w,       "rotri.w",        0,    0x004c8000)
+INST(slli_d,        "slli.d",         0,    0x00410000)
+INST(srli_d,        "srli.d",         0,    0x00450000)
+INST(srai_d,        "srai.d",         0,    0x00490000)
+INST(rotri_d,       "rotri.d",        0,    0x004d0000)
+
+INST(addu16i_d,     "addu16i.d",      0,    0x10000000)
+
+INST(jirl,          "jirl",           0,    0x4c000000)
+////////////////////////////////////////////////////////////////////////////////////////////
+////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg().
+//
+////NOTE:  End
+////     the above instructions will be used by emitter::emitInsMayWriteToGCReg().
+////////////////////////////////////////////////////////////////////////////////////////////
+
+////Store.
+INST(st_b,          "st.b",           ST,   0x29000000)
+INST(st_h,          "st.h",           ST,   0x29400000)
+INST(st_w,          "st.w",           ST,   0x29800000)
+INST(st_d,          "st.d",           ST,   0x29c00000)
+
+INST(stptr_w,       "stptr.w",        ST,   0x25000000)
+INST(stptr_d,       "stptr.d",        ST,   0x27000000)
+INST(sc_w,          "sc.w",           0,    0x21000000)
+INST(sc_d,          "sc.d",           0,    0x23000000)
+
+INST(stx_b,         "stx.b",          ST,   0x38100000)
+INST(stx_h,         "stx.h",          ST,   0x38140000)
+INST(stx_w,         "stx.w",          ST,   0x38180000)
+INST(stx_d,         "stx.d",          ST,   0x381c0000)
+INST(stgt_b,        "stgt.b",         0,    0x387c0000)
+INST(stgt_h,        "stgt.h",         0,    0x387c8000)
+INST(stgt_w,        "stgt.w",         0,    0x387d0000)
+INST(stgt_d,        "stgt.d",         0,    0x387d8000)
+INST(stle_b,        "stle.b",         0,    0x387e0000)
+INST(stle_h,        "stle.h",         0,    0x387e8000)
+INST(stle_w,        "stle.w",         0,    0x387f0000)
+INST(stle_d,        "stle.d",         0,    0x387f8000)
+
+INST(dbar,          "dbar",           0,    0x38720000)
+INST(ibar,          "ibar",           0,    0x38728000)
+
+INST(syscall,       "syscall",        0,    0x002b0000)
+INST(break,         "break",          0,    0x002a0005)
+
+INST(asrtle_d,      "asrtle.d",       0,    0x00010000)
+INST(asrtgt_d,      "asrtgt.d",       0,    0x00018000)
+
+INST(preld,         "preld",          LD,   0x2ac00000)
+INST(preldx,        "preldx",         LD,   0x382c0000)
+
+////Float instructions.
+////R_R_R.
+INST(fadd_s,        "fadd.s",         0,    0x01008000)
+INST(fadd_d,        "fadd.d",         0,    0x01010000)
+INST(fsub_s,        "fsub.s",         0,    0x01028000)
+INST(fsub_d,        "fsub.d",         0,    0x01030000)
+INST(fmul_s,        "fmul.s",         0,    0x01048000)
+INST(fmul_d,        "fmul.d",         0,    0x01050000)
+INST(fdiv_s,        "fdiv.s",         0,    0x01068000)
+INST(fdiv_d,        "fdiv.d",         0,    0x01070000)
+
+INST(fmax_s,        "fmax.s",         0,    0x01088000)
+INST(fmax_d,        "fmax.d",         0,    0x01090000)
+INST(fmin_s,        "fmin.s",         0,    0x010a8000)
+INST(fmin_d,        "fmin.d",         0,    0x010b0000)
+INST(fmaxa_s,       "fmaxa.s",        0,    0x010c8000)
+INST(fmaxa_d,       "fmaxa.d",        0,    0x010d0000)
+INST(fmina_s,       "fmina.s",        0,    0x010e8000)
+INST(fmina_d,       "fmina.d",        0,    0x010f0000)
+
+INST(fscaleb_s,     "fscaleb.s",      0,    0x01108000)
+INST(fscaleb_d,     "fscaleb.d",      0,    0x01110000)
+
+INST(fcopysign_s,   "fcopysign.s",    0,    0x01128000)
+INST(fcopysign_d,   "fcopysign.d",    0,    0x01130000)
+
+INST(fldx_s,        "fldx.s",         LD,   0x38300000)
+INST(fldx_d,        "fldx.d",         LD,   0x38340000)
+INST(fstx_s,        "fstx.s",         ST,   0x38380000)
+INST(fstx_d,        "fstx.d",         ST,   0x383c0000)
+
+INST(fldgt_s,       "fldgt.s",        0,    0x38740000)
+INST(fldgt_d,       "fldgt.d",        0,    0x38748000)
+INST(fldle_s,       "fldle.s",        0,    0x38750000)
+INST(fldle_d,       "fldle.d",        0,    0x38758000)
+INST(fstgt_s,       "fstgt.s",        0,    0x38760000)
+INST(fstgt_d,       "fstgt.d",        0,    0x38768000)
+INST(fstle_s,       "fstle.s",        0,    0x38770000)
+INST(fstle_d,       "fstle.d",        0,    0x38778000)
+
+////R_R_R_R.
+INST(fmadd_s,       "fmadd.s",        0,    0x08100000)
+INST(fmadd_d,       "fmadd.d",        0,    0x08200000)
+INST(fmsub_s,       "fmsub.s",        0,    0x08500000)
+INST(fmsub_d,       "fmsub.d",        0,    0x08600000)
+INST(fnmadd_s,      "fnmadd.s",       0,    0x08900000)
+INST(fnmadd_d,      "fnmadd.d",       0,    0x08a00000)
+INST(fnmsub_s,      "fnmsub.s",       0,    0x08d00000)
+INST(fnmsub_d,      "fnmsub.d",       0,    0x08e00000)
+
+////R_R.
+INST(fabs_s,        "fabs.s",         0,    0x01140400)
+INST(fabs_d,        "fabs.d",         0,    0x01140800)
+INST(fneg_s,        "fneg.s",         0,    0x01141400)
+INST(fneg_d,        "fneg.d",         0,    0x01141800)
+
+INST(fsqrt_s,       "fsqrt.s",        0,    0x01144400)
+INST(fsqrt_d,       "fsqrt.d",        0,    0x01144800)
+INST(frsqrt_s,      "frsqrt.s",       0,    0x01146400)
+INST(frsqrt_d,      "frsqrt.d",       0,    0x01146800)
+INST(frecip_s,      "frecip.s",       0,    0x01145400)
+INST(frecip_d,      "frecip.d",       0,    0x01145800)
+INST(flogb_s,       "flogb.s",        0,    0x01142400)
+INST(flogb_d,       "flogb.d",        0,    0x01142800)
+INST(fclass_s,      "fclass.s",       0,    0x01143400)
+INST(fclass_d,      "fclass.d",       0,    0x01143800)
+
+INST(fcvt_s_d,      "fcvt.s.d",       0,    0x01191800)
+INST(fcvt_d_s,      "fcvt.d.s",       0,    0x01192400)
+INST(ffint_s_w,     "ffint.s.w",      0,    0x011d1000)
+INST(ffint_s_l,     "ffint.s.l",      0,    0x011d1800)
+INST(ffint_d_w,     "ffint.d.w",      0,    0x011d2000)
+INST(ffint_d_l,     "ffint.d.l",      0,    0x011d2800)
+INST(ftint_w_s,     "ftint.w.s",      0,    0x011b0400)
+INST(ftint_w_d,     "ftint.w.d",      0,    0x011b0800)
+INST(ftint_l_s,     "ftint.l.s",      0,    0x011b2400)
+INST(ftint_l_d,     "ftint.l.d",      0,    0x011b2800)
+INST(ftintrm_w_s,   "ftintrm.w.s",    0,    0x011a0400)
+INST(ftintrm_w_d,   "ftintrm.w.d",    0,    0x011a0800)
+INST(ftintrm_l_s,   "ftintrm.l.s",    0,    0x011a2400)
+INST(ftintrm_l_d,   "ftintrm.l.d",    0,    0x011a2800)
+INST(ftintrp_w_s,   "ftintrp.w.s",    0,    0x011a4400)
+INST(ftintrp_w_d,   "ftintrp.w.d",    0,    0x011a4800)
+INST(ftintrp_l_s,   "ftintrp.l.s",    0,    0x011a6400)
+INST(ftintrp_l_d,   "ftintrp.l.d",    0,    0x011a6800)
+INST(ftintrz_w_s,   "ftintrz.w.s",    0,    0x011a8400)
+INST(ftintrz_w_d,   "ftintrz.w.d",    0,    0x011a8800)
+INST(ftintrz_l_s,   "ftintrz.l.s",    0,    0x011aa400)
+INST(ftintrz_l_d,   "ftintrz.l.d",    0,    0x011aa800)
+INST(ftintrne_w_s,  "ftintrne.w.s",   0,    0x011ac400)
+INST(ftintrne_w_d,  "ftintrne.w.d",   0,    0x011ac800)
+INST(ftintrne_l_s,  "ftintrne.l.s",   0,    0x011ae400)
+INST(ftintrne_l_d,  "ftintrne.l.d",   0,    0x011ae800)
+INST(frint_s,       "frint.s",        0,    0x011e4400)
+INST(frint_d,       "frint.d",        0,    0x011e4800)
+
+INST(fmov_s,        "fmov.s",         0,    0x01149400)
+INST(fmov_d,        "fmov.d",         0,    0x01149800)
+
+INST(movgr2fr_w,    "movgr2fr.w",     0,    0x0114a400)
+INST(movgr2fr_d,    "movgr2fr.d",     0,    0x0114a800)
+INST(movgr2frh_w,   "movgr2frh.w",    0,    0x0114ac00)
+INST(movfr2gr_s,    "movfr2gr.s",     0,    0x0114b400)
+INST(movfr2gr_d,    "movfr2gr.d",     0,    0x0114b800)
+INST(movfrh2gr_s,   "movfrh2gr.s",    0,    0x0114bc00)
+
+////
+INST(movgr2fcsr,    "movgr2fcsr",     0,    0x0114c000)
+INST(movfcsr2gr,    "movfcsr2gr",     0,    0x0114c800)
+INST(movfr2cf,      "movfr2cf",       0,    0x0114d000)
+INST(movcf2fr,      "movcf2fr",       0,    0x0114d400)
+INST(movgr2cf,      "movgr2cf",       0,    0x0114d800)
+INST(movcf2gr,      "movcf2gr",       0,    0x0114dc00)
+
+////R_R_I.
+INST(fcmp_caf_s,    "fcmp.caf.s",     0,    0x0c100000)
+INST(fcmp_cun_s,    "fcmp.cun.s",     0,    0x0c140000)
+INST(fcmp_ceq_s,    "fcmp.ceq.s",     0,    0x0c120000)
+INST(fcmp_cueq_s,   "fcmp.cueq.s",    0,    0x0c160000)
+INST(fcmp_clt_s,    "fcmp.clt.s",     0,    0x0c110000)
+INST(fcmp_cult_s,   "fcmp.cult.s",    0,    0x0c150000)
+INST(fcmp_cle_s,    "fcmp.cle.s",     0,    0x0c130000)
+INST(fcmp_cule_s,   "fcmp.cule.s",    0,    0x0c170000)
+INST(fcmp_cne_s,    "fcmp.cne.s",     0,    0x0c180000)
+INST(fcmp_cor_s,    "fcmp.cor.s",     0,    0x0c1a0000)
+INST(fcmp_cune_s,    "fcmp.cune.s",   0,    0x0c1c0000)
+
+INST(fcmp_saf_d,    "fcmp.saf.d",     0,    0x0c208000)
+INST(fcmp_sun_d,    "fcmp.sun.d",     0,    0x0c248000)
+INST(fcmp_seq_d,    "fcmp.seq.d",     0,    0x0c228000)
+INST(fcmp_sueq_d,   "fcmp.sueq.d",    0,    0x0c268000)
+INST(fcmp_slt_d,    "fcmp.slt.d",     0,    0x0c218000)
+INST(fcmp_sult_d,   "fcmp.sult.d",    0,    0x0c258000)
+INST(fcmp_sle_d,    "fcmp.sle.d",     0,    0x0c238000)
+INST(fcmp_sule_d,   "fcmp.sule.d",    0,    0x0c278000)
+INST(fcmp_sne_d,    "fcmp.sne.d",     0,    0x0c288000)
+INST(fcmp_sor_d,    "fcmp.sor.d",     0,    0x0c2a8000)
+INST(fcmp_sune_d,   "fcmp.sune.d",    0,    0x0c2c8000)
+
+INST(fcmp_caf_d,    "fcmp.caf.d",     0,    0x0c200000)
+INST(fcmp_cun_d,    "fcmp.cun.d",     0,    0x0c240000)
+INST(fcmp_ceq_d,    "fcmp.ceq.d",     0,    0x0c220000)
+INST(fcmp_cueq_d,   "fcmp.cueq.d",    0,    0x0c260000)
+INST(fcmp_clt_d,    "fcmp.clt.d",     0,    0x0c210000)
+INST(fcmp_cult_d,   "fcmp.cult.d",    0,    0x0c250000)
+INST(fcmp_cle_d,    "fcmp.cle.d",     0,    0x0c230000)
+INST(fcmp_cule_d,   "fcmp.cule.d",    0,    0x0c270000)
+INST(fcmp_cne_d,    "fcmp.cne.d",     0,    0x0c280000)
+INST(fcmp_cor_d,    "fcmp.cor.d",     0,    0x0c2a0000)
+INST(fcmp_cune_d,   "fcmp.cune.d",    0,    0x0c2c0000)
+
+INST(fcmp_saf_s,    "fcmp.saf.s",     0,    0x0c108000)
+INST(fcmp_sun_s,    "fcmp.sun.s",     0,    0x0c148000)
+INST(fcmp_seq_s,    "fcmp.seq.s",     0,    0x0c128000)
+INST(fcmp_sueq_s,   "fcmp.sueq.s",    0,    0x0c168000)
+INST(fcmp_slt_s,    "fcmp.slt.s",     0,    0x0c118000)
+INST(fcmp_sult_s,   "fcmp.sult.s",    0,    0x0c158000)
+INST(fcmp_sle_s,    "fcmp.sle.s",     0,    0x0c138000)
+INST(fcmp_sule_s,   "fcmp.sule.s",    0,    0x0c178000)
+INST(fcmp_sne_s,    "fcmp.sne.s",     0,    0x0c188000)
+INST(fcmp_sor_s,    "fcmp.sor.s",     0,    0x0c1a8000)
+INST(fcmp_sune_s,   "fcmp.sune.s",    0,    0x0c1c8000)
+
+////R_R_I.
+INST(fld_s,         "fld.s",          LD,   0x2b000000)
+INST(fld_d,         "fld.d",          LD,   0x2b800000)
+INST(fst_s,         "fst.s",          ST,   0x2b400000)
+INST(fst_d,         "fst.d",          ST,   0x2bc00000)
+
+// clang-format on
+/*****************************************************************************/
+#undef INST
+/*****************************************************************************/
diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h
index 1a1af89490d62..46945ed7eae7f 100644
--- a/src/coreclr/jit/jit.h
+++ b/src/coreclr/jit/jit.h
@@ -42,6 +42,9 @@
 #if defined(HOST_ARM64)
 #error Cannot define both HOST_X86 and HOST_ARM64
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_X86 and HOST_LOONGARCH64
+#endif
 #elif defined(HOST_AMD64)
 #if defined(HOST_X86)
 #error Cannot define both HOST_AMD64 and HOST_X86
@@ -52,6 +55,9 @@
 #if defined(HOST_ARM64)
 #error Cannot define both HOST_AMD64 and HOST_ARM64
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_AMD64 and HOST_LOONGARCH64
+#endif
 #elif defined(HOST_ARM)
 #if defined(HOST_X86)
 #error Cannot define both HOST_ARM and HOST_X86
@@ -62,6 +68,9 @@
 #if defined(HOST_ARM64)
 #error Cannot define both HOST_ARM and HOST_ARM64
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_ARM and HOST_LOONGARCH64
+#endif
 #elif defined(HOST_ARM64)
 #if defined(HOST_X86)
 #error Cannot define both HOST_ARM64 and HOST_X86
@@ -72,6 +81,22 @@
 #if defined(HOST_ARM)
 #error Cannot define both HOST_ARM64 and HOST_ARM
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_ARM64 and HOST_LOONGARCH64
+#endif
+#elif defined(HOST_LOONGARCH64)
+#if defined(HOST_X86)
+#error Cannot define both HOST_LOONGARCH64 and HOST_X86
+#endif
+#if defined(HOST_AMD64)
+#error Cannot define both HOST_LOONGARCH64 and HOST_AMD64
+#endif
+#if defined(HOST_ARM)
+#error Cannot define both HOST_LOONGARCH64 and HOST_ARM
+#endif
+#if defined(HOST_ARM64)
+#error Cannot define both HOST_LOONGARCH64 and HOST_ARM64
+#endif
 #else
 #error Unsupported or unset host architecture
 #endif
@@ -86,6 +111,9 @@
 #if defined(TARGET_ARM64)
 #error Cannot define both TARGET_X86 and TARGET_ARM64
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_X86 and TARGET_LOONGARCH64
+#endif
 #elif defined(TARGET_AMD64)
 #if defined(TARGET_X86)
 #error Cannot define both TARGET_AMD64 and TARGET_X86
@@ -96,6 +124,9 @@
 #if defined(TARGET_ARM64)
 #error Cannot define both TARGET_AMD64 and TARGET_ARM64
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_AMD64 and TARGET_LOONGARCH64
+#endif
 #elif defined(TARGET_ARM)
 #if defined(TARGET_X86)
 #error Cannot define both TARGET_ARM and TARGET_X86
@@ -106,6 +137,9 @@
 #if defined(TARGET_ARM64)
 #error Cannot define both TARGET_ARM and TARGET_ARM64
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_ARM and TARGET_LOONGARCH64
+#endif
 #elif defined(TARGET_ARM64)
 #if defined(TARGET_X86)
 #error Cannot define both TARGET_ARM64 and TARGET_X86
@@ -116,6 +150,22 @@
 #if defined(TARGET_ARM)
 #error Cannot define both TARGET_ARM64 and TARGET_ARM
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_ARM64 and TARGET_LOONGARCH64
+#endif
+#elif defined(TARGET_LOONGARCH64)
+#if defined(TARGET_X86)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_X86
+#endif
+#if defined(TARGET_AMD64)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_AMD64
+#endif
+#if defined(TARGET_ARM)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM
+#endif
+#if defined(TARGET_ARM64)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM64
+#endif
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -163,6 +213,8 @@
 #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARMNT
 #elif defined(TARGET_ARM64)
 #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64
+#elif defined(TARGET_LOONGARCH64)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0x6264
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -207,6 +259,14 @@
 #define UNIX_AMD64_ABI_ONLY(x)
 #endif // defined(UNIX_AMD64_ABI)
 
+#if defined(TARGET_LOONGARCH64)
+#define UNIX_LOONGARCH64_ONLY_ARG(x) , x
+#define UNIX_LOONGARCH64_ONLY(x) x
+#else // !TARGET_LOONGARCH64
+#define UNIX_LOONGARCH64_ONLY_ARG(x)
+#define UNIX_LOONGARCH64_ONLY(x)
+#endif // TARGET_LOONGARCH64
+
 #if defined(DEBUG)
 #define DEBUG_ARG_SLOTS
 #endif
@@ -224,7 +284,7 @@
 #define DEBUG_ARG_SLOTS_ASSERT(x)
 #endif
 
-#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64)
+#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #define FEATURE_PUT_STRUCT_ARG_STK 1
 #endif
 
@@ -236,7 +296,7 @@
 #define UNIX_AMD64_ABI_ONLY(x)
 #endif // defined(UNIX_AMD64_ABI)
 
-#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #define MULTIREG_HAS_SECOND_GC_RET 1
 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x
 #define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index f92329e46f053..f3bef3cf01475 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -557,6 +557,11 @@ CONFIG_STRING(JitFunctionFile, W("JitFunctionFile"))
 //    of the frame)
 CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSavedRegisters"), 0)
 #endif // defined(TARGET_ARM64)
+
+#if defined(TARGET_LOONGARCH64)
+// Disable emitDispIns by default
+CONFIG_INTEGER(JitDispIns, W("JitDispIns"), 0)
+#endif // defined(TARGET_LOONGARCH64)
 #endif // DEBUG
 
 CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type.
diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp
index 9590279526924..3d338ff266df7 100644
--- a/src/coreclr/jit/jiteh.cpp
+++ b/src/coreclr/jit/jiteh.cpp
@@ -888,7 +888,7 @@ unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTr
     assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
     assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion);
 #else
     *inTryRegion = true;
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 8145cbcf2b6b6..4b2ce440126d9 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -659,7 +659,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             lvaSetClass(varDscInfo->varNum, clsHnd);
         }
 
-        // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers
+        // For ARM, ARM64, LOONGARCH64, and AMD64 varargs, all arguments go in integer registers
         var_types argType = mangleVarArgsType(varDsc->TypeGet());
 
         var_types origArgType = argType;
@@ -813,6 +813,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             }
         }
 #else // !TARGET_ARM
+
 #if defined(UNIX_AMD64_ABI)
         SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
         if (varTypeIsStruct(argType))
@@ -873,9 +874,101 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
         }
         else
-#endif // defined(UNIX_AMD64_ABI)
+#elif defined(TARGET_LOONGARCH64)
+        uint32_t  floatFlags          = STRUCT_NO_FLOAT_FIELD;
+        var_types argRegTypeInStruct1 = TYP_UNKNOWN;
+        var_types argRegTypeInStruct2 = TYP_UNKNOWN;
+
+        if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES))
+        {
+            floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd);
+        }
+
+        if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)
+        {
+            assert(varTypeIsStruct(argType));
+            int floatNum = 0;
+            if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
+            {
+                assert(argSize <= 8);
+                assert(varDsc->lvExactSize <= argSize);
+
+                floatNum              = 1;
+                canPassArgInRegisters = varDscInfo->canEnreg(argRegTypeInStruct1, 1);
+
+                argRegTypeInStruct1 = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
+            }
+            else if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
+            {
+                floatNum              = 2;
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
+
+                argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+            }
+            else if ((floatFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
+            {
+                floatNum              = 1;
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
+                canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
+
+                argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+            }
+            else if ((floatFlags & STRUCT_FLOAT_FIELD_SECOND) != 0)
+            {
+                floatNum              = 1;
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
+                canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
+
+                argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+            }
+
+            assert((floatNum == 1) || (floatNum == 2));
+
+            if (!canPassArgInRegisters)
+            {
+                // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument,
+                // integer registers (if any) are used instead.
+                varDscInfo->setAllRegArgUsed(TYP_DOUBLE);
+                canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+
+                argRegTypeInStruct1 = TYP_UNKNOWN;
+                argRegTypeInStruct2 = TYP_UNKNOWN;
+
+                if (cSlotsToEnregister == 2)
+                {
+                    if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1))
+                    {
+                        // Here a struct-arg which needs two registers but only one integer register available,
+                        // it has to be split.
+                        argRegTypeInStruct1   = TYP_I_IMPL;
+                        canPassArgInRegisters = true;
+                    }
+                }
+            }
+        }
+        else
+#endif // defined(TARGET_LOONGARCH64)
         {
             canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+#if defined(TARGET_LOONGARCH64)
+            // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument,
+            // integer registers (if any) are used instead.
+            if (!canPassArgInRegisters && varTypeIsFloating(argType))
+            {
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
+                argType               = canPassArgInRegisters ? TYP_I_IMPL : argType;
+            }
+            if (!canPassArgInRegisters && (cSlots > 1))
+            {
+                // If a struct-arg which needs two registers but only one integer register available,
+                // it has to be split.
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
+                argRegTypeInStruct1   = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
+            }
+#endif
         }
 
         if (canPassArgInRegisters)
@@ -905,7 +998,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 }
             }
             else
-#endif // defined(UNIX_AMD64_ABI)
+#elif defined(TARGET_LOONGARCH64)
+            if (argRegTypeInStruct1 != TYP_UNKNOWN)
+            {
+                firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1);
+            }
+            else
+#endif // defined(TARGET_LOONGARCH64)
             {
                 firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
             }
@@ -953,6 +1052,40 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType));
                 }
             }
+#elif defined(TARGET_LOONGARCH64)
+            if (argType == TYP_STRUCT)
+            {
+                if (argRegTypeInStruct1 != TYP_UNKNOWN)
+                {
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1));
+                    varDsc->lvIs4Field1 = (genTypeSize(argRegTypeInStruct1) == 4) ? 1 : 0;
+                    if (argRegTypeInStruct2 != TYP_UNKNOWN)
+                    {
+                        unsigned secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1);
+                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2));
+                        varDsc->lvIs4Field2 = (genTypeSize(argRegTypeInStruct2) == 4) ? 1 : 0;
+                    }
+                    else if (cSlots > 1)
+                    {
+                        // Here a struct-arg which needs two registers but only one integer register available,
+                        // it has to be split. But we reserved extra 8-bytes for the whole struct.
+                        varDsc->lvIsSplit = 1;
+                        varDsc->SetOtherArgReg(REG_STK);
+                        varDscInfo->setAllRegArgUsed(argRegTypeInStruct1);
+#if FEATURE_FASTTAILCALL
+                        varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
+#endif
+                    }
+                }
+                else
+                {
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
+                    if (cSlots == 2)
+                    {
+                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
+                    }
+                }
+            }
 #else  // ARM32
             if (varTypeIsStruct(argType))
             {
@@ -1079,11 +1212,11 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 varDscInfo->setAnyFloatStackArgs();
             }
 
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             // If we needed to use the stack in order to pass this argument then
             // record the fact that we have used up any remaining registers of this 'type'
-            // This prevents any 'backfilling' from occuring on ARM64
+            // This prevents any 'backfilling' from occuring on ARM64/LoongArch64.
             //
             varDscInfo->setAllRegArgUsed(argType);
 
@@ -1357,7 +1490,12 @@ void Compiler::lvaInitVarDsc(LclVarDsc*              varDsc,
 
 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
     varDsc->lvIsImplicitByRef = 0;
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_LOONGARCH64)
+    varDsc->lvIsImplicitByRef = 0;
+    varDsc->lvIs4Field1       = 0;
+    varDsc->lvIs4Field2       = 0;
+    varDsc->lvIsSplit         = 0;
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     // Set the lvType (before this point it is TYP_UNDEF).
 
@@ -1688,7 +1826,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE
     const int MaxOffset      = MAX_NumOfFieldsInPromotableStruct * FP_REGSIZE_BYTES;
 #endif // defined(TARGET_XARCH) || defined(TARGET_ARM64)
 #else  // !FEATURE_SIMD
-    const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
+    const int MaxOffset       = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
 #endif // !FEATURE_SIMD
 
     assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized
@@ -1993,7 +2131,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
         JITDUMP("Not promoting multi-reg returned struct local V%02u with holes.\n", lclNum);
         shouldPromote = false;
     }
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
     // TODO-PERF - Only do this when the LclVar is used in an argument context
     // TODO-ARM64 - HFA support should also eliminate the need for this.
     // TODO-ARM32 - HFA support should also eliminate the need for this.
@@ -2010,7 +2148,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
                 lclNum, structPromotionInfo.fieldCnt);
         shouldPromote = false;
     }
-#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64
     else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa())
     {
 #if FEATURE_MULTIREG_STRUCT_PROMOTE
@@ -2326,7 +2464,7 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
             compiler->compLongUsed = true;
         }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
         // Reset the implicitByRef flag.
         fieldVarDsc->lvIsImplicitByRef = 0;
@@ -2701,7 +2839,7 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg)
             return true;
         }
 
-#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         if (howToPassStruct == SPK_ByValue)
         {
             assert(type == TYP_STRUCT);
@@ -2742,7 +2880,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
             CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
             varDsc->lvType              = impNormStructType(typeHnd, &simdBaseJitType);
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // Mark implicit byref struct parameters
             if (varDsc->lvIsParam && !varDsc->lvIsStructField)
             {
@@ -2755,7 +2893,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
                     varDsc->lvIsImplicitByRef = 1;
                 }
             }
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
 #if FEATURE_SIMD
             if (simdBaseJitType != CORINFO_TYPE_UNDEF)
@@ -3733,20 +3871,20 @@ size_t LclVarDsc::lvArgStackSize() const
 #if defined(WINDOWS_AMD64_ABI)
         // Structs are either passed by reference or can be passed by value using one pointer
         stackSize = TARGET_POINTER_SIZE;
-#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
         // lvSize performs a roundup.
         stackSize = this->lvSize();
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa()))
         {
             // If the size is greater than 16 bytes then it will
             // be passed by reference.
             stackSize = TARGET_POINTER_SIZE;
         }
-#endif // defined(TARGET_ARM64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
-#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI
+#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64
 
         NYI("Unsupported target.");
         unreached();
@@ -5274,14 +5412,14 @@ void Compiler::lvaFixVirtualFrameOffsets()
         // We set FP to be after LR, FP
         delta += 2 * REGSIZE_BYTES;
     }
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     else
     {
         // FP is used.
         JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
         delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
     }
-#endif // TARGET_AMD64
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64
 
     if (opts.IsOSR())
     {
@@ -5390,11 +5528,11 @@ void Compiler::lvaFixVirtualFrameOffsets()
 
 #endif // FEATURE_FIXED_OUT_ARGS
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     // We normally add alignment below the locals between them and the outgoing
-    // arg space area. When we store fp/lr at the bottom, however, this will be
-    // below the alignment. So we should not apply the alignment adjustment to
-    // them. On ARM64 it turns out we always store these at +0 and +8 of the FP,
+    // arg space area. When we store fp/lr(ra) at the bottom, however, this will
+    // be below the alignment. So we should not apply the alignment adjustment to
+    // them. It turns out we always store these at +0 and +8 of the FP,
     // so instead of dealing with skipping adjustment just for them we just set
     // them here always.
     assert(codeGen->isFramePointerUsed());
@@ -5402,7 +5540,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
     {
         lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
     }
-#endif
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 #ifdef TARGET_ARM
@@ -5799,7 +5937,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
          * when updating the current offset on the stack */
         CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
 #if DEBUG
         // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize
         // Also investigate why we are incrementing argOffs for X86 as this seems incorrect
@@ -5907,6 +6045,17 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
             varDsc->SetStackOffset(argOffs);
             argOffs += argSize;
         }
+
+#elif defined(TARGET_LOONGARCH64)
+
+        if (varDsc->lvIsSplit)
+        {
+            assert((varDsc->lvType == TYP_STRUCT) && (varDsc->GetOtherArgReg() == REG_STK));
+            // This is a split struct. It will account for an extra (8 bytes) for the whole struct.
+            varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE);
+            argOffs += TARGET_POINTER_SIZE;
+        }
+
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -6187,7 +6336,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
     }
 
-#else // !TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+
+    // Subtract off FP and RA.
+    assert(compCalleeRegsPushed >= 2);
+    stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+
+#else // !TARGET_LOONGARCH64
 #ifdef TARGET_ARM
     // On ARM32 LR is part of the pushed registers and is always stored at the
     // top.
@@ -6198,7 +6353,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 #endif
 
     stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
-#endif // !TARGET_ARM64
+#endif // !TARGET_LOONGARCH64
 
     // (2) Account for the remainder of the frame
     //
@@ -6284,7 +6439,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // TARGET_AMD64
 
-#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARMARCH)
+#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64))
     if (lvaPSPSym != BAD_VAR_NUM)
     {
         // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including
@@ -6293,7 +6448,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
         stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
     }
-#endif // FEATURE_EH_FUNCLETS && defined(TARGET_ARMARCH)
+#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64)
 
     if (mustDoubleAlign)
     {
@@ -6792,7 +6947,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
             // Reserve the stack space for this variable
             stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs);
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
             // If we have an incoming register argument that has a struct promoted field
             // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
             //
@@ -6818,7 +6973,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                 lvaTable[fieldVarNum + 1].SetStackOffset(varDsc->GetStackOffset() + 4);
             }
 #endif // TARGET_ARM
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
         }
     }
 
@@ -6923,6 +7078,11 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // TARGET_ARM64
 
+#if defined(TARGET_LOONGARCH64)
+    assert(isFramePointerUsed()); // Note that currently we always have a frame pointer
+    stkOffs -= 2 * REGSIZE_BYTES;
+#endif // TARGET_LOONGARCH64
+
 #if FEATURE_FIXED_OUT_ARGS
     if (lvaOutgoingArgSpaceSize > 0)
     {
@@ -7126,9 +7286,9 @@ void Compiler::lvaAlignFrame()
         lvaIncrementFrameSize(REGSIZE_BYTES);
     }
 
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
-    // The stack on ARM64 must be 16 byte aligned.
+    // The stack on ARM64/LoongArch64 must be 16 byte aligned.
 
     // First, align up to 8.
     if ((compLclFrameSize % 8) != 0)
@@ -7793,11 +7953,11 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
 
     compCalleeRegsPushed = CNT_CALLEE_SAVED;
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     if (compFloatingPointUsed)
         compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
 
-    compCalleeRegsPushed++; // we always push LR.  See genPushCalleeSavedRegisters
+    compCalleeRegsPushed++; // we always push LR/RA.  See genPushCalleeSavedRegisters
 #elif defined(TARGET_AMD64)
     if (compFloatingPointUsed)
     {
@@ -7829,12 +7989,12 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
     lvaAssignFrameOffsets(curState);
 
     unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     if (compFloatingPointUsed)
     {
         calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
     }
-    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR.  See genPushCalleeSavedRegisters
+    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR/RA.  See genPushCalleeSavedRegisters
 #endif
 
     result = compLclFrameSize + calleeSavedRegMaxSz;
@@ -8146,13 +8306,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData*
         // Calculate padding
         unsigned padding = LCL_FLD_PADDING(lclNum);
 
-#ifdef TARGET_ARMARCH
-        // We need to support alignment requirements to access memory on ARM ARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
+        // We need to support alignment requirements to access memory.
         unsigned alignment = 1;
         pComp->codeGen->InferOpSizeAlign(lcl, &alignment);
         alignment = roundUp(alignment, TARGET_POINTER_SIZE);
         padding   = roundUp(padding, alignment);
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
         // Change the variable to a TYP_BLK
         if (varType != TYP_BLK)
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 5951452102a4e..5bf6eca5c3997 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -254,7 +254,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             LowerCast(node);
             break;
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         case GT_BOUNDS_CHECK:
             ContainCheckBoundsChk(node->AsBoundsChk());
             break;
@@ -281,7 +281,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
         case GT_LSH:
         case GT_RSH:
         case GT_RSZ:
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             LowerShift(node->AsOp());
 #else
             ContainCheckShiftRotate(node->AsOp());
@@ -361,7 +361,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             LowerStoreLocCommon(node->AsLclVarCommon());
             break;
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         case GT_CMPXCHG:
             CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
             break;
@@ -389,7 +389,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             break;
 #endif
 
-#ifndef TARGET_ARMARCH
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         // TODO-ARMARCH-CQ: We should contain this as long as the offset fits.
         case GT_OBJ:
             if (node->AsObj()->Addr()->OperIsLocalAddr())
@@ -1080,7 +1080,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
 
     bool isOnStack = (info->GetRegNum() == REG_STK);
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // Mark contained when we pass struct
     // GT_FIELD_LIST is always marked contained when it is generated
     if (type == TYP_STRUCT)
@@ -1461,7 +1461,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
 #endif // !defined(TARGET_64BIT)
     {
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         if (call->IsVarargs() || comp->opts.compUseSoftFP)
         {
             // For vararg call or on armel, reg args should be all integer.
@@ -1472,7 +1472,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
                 type = newNode->TypeGet();
             }
         }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
         GenTree* putArg = NewPutArg(call, arg, info, type);
 
@@ -1486,9 +1486,9 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
     }
 }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 //------------------------------------------------------------------------
-// LowerFloatArg: Lower float call arguments on the arm platform.
+// LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform.
 //
 // Arguments:
 //    arg  - The arg node
@@ -3189,7 +3189,7 @@ GenTree* Lowering::LowerCompare(GenTree* cmp)
 //
 GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
 {
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     GenTree* relop    = jtrue->gtGetOp1();
     GenTree* relopOp2 = relop->AsOp()->gtGetOp2();
 
@@ -3198,6 +3198,14 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
         bool         useJCMP = false;
         GenTreeFlags flags   = GTF_EMPTY;
 
+#if defined(TARGET_LOONGARCH64)
+        if (relop->OperIs(GT_EQ, GT_NE))
+        {
+            // Codegen will use beq or bne.
+            flags   = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY;
+            useJCMP = true;
+        }
+#else  // TARGET_ARM64
         if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0))
         {
             // Codegen will use cbz or cbnz in codegen which do not affect the flag register
@@ -3210,6 +3218,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
             flags   = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : GTF_EMPTY);
             useJCMP = true;
         }
+#endif // TARGET_ARM64
 
         if (useJCMP)
         {
@@ -3226,7 +3235,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
             return nullptr;
         }
     }
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
 
     ContainCheckJTrue(jtrue);
 
@@ -3933,10 +3942,16 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store)
     assert(!call->HasMultiRegRetVal());
 
     const ClassLayout* layout  = store->GetLayout();
-    const var_types    regType = layout->GetRegisterType();
+    var_types          regType = layout->GetRegisterType();
 
     if (regType != TYP_UNDEF)
     {
+#if defined(TARGET_LOONGARCH64)
+        if (varTypeIsFloating(call->TypeGet()))
+        {
+            regType = call->TypeGet();
+        }
+#endif
         store->ChangeType(regType);
         store->SetOper(GT_STOREIND);
         LowerStoreIndirCommon(store->AsStoreInd());
@@ -5485,7 +5500,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
             return next;
         }
 
-#ifndef TARGET_ARMARCH
+#ifdef TARGET_XARCH
         if (BlockRange().TryGetUse(node, &use))
         {
             // If this is a child of an indir, let the parent handle it.
@@ -5496,7 +5511,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
                 TryCreateAddrMode(node, false, parent);
             }
         }
-#endif // !TARGET_ARMARCH
+#endif // TARGET_XARCH
     }
 
     if (node->OperIs(GT_ADD))
@@ -5613,7 +5628,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
     }
 
 // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     if (!comp->opts.MinOpts() && (divisorValue >= 3))
     {
         size_t magic;
@@ -5693,7 +5708,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         GenTree* firstNode        = nullptr;
         GenTree* adjustedDividend = dividend;
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64)
         // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one.
         bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul;
 #else
@@ -5901,7 +5916,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
             return nullptr;
         }
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         ssize_t magic;
         int     shift;
 
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index 1a0bb8b5ed992..09d716ac1b5c7 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -161,7 +161,7 @@ class Lowering final : public Phase
     void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode);
     GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type);
     void LowerArg(GenTreeCall* call, GenTree** ppTree);
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     GenTree* LowerFloatArg(GenTree** pArg, fgArgTabEntry* info);
     GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum);
 #endif
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
new file mode 100644
index 0000000000000..78ac528ba4c64
--- /dev/null
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -0,0 +1,829 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX             Lowering for LOONGARCH64 common code                          XX
+XX                                                                           XX
+XX  This encapsulates common logic for lowering trees for the LOONGARCH64    XX
+XX  architectures.  For a more detailed view of what is lowering, please     XX
+XX  take a look at Lower.cpp                                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_LOONGARCH64 // This file is ONLY used for LOONGARCH64 architectures
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+#ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsic.h"
+#endif
+
+//------------------------------------------------------------------------
+// IsCallTargetInRange: Can a call target address be encoded in-place?
+//
+// Return Value:
+//    True if the addr fits into the range.
+//
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+    // TODO-LOONGARCH64-CQ: using B/BL for optimization.
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsContainableImmed: Is an immediate encodable in-place?
+//
+// Return Value:
+//    True if the immediate can be folded into an instruction,
+//    for example small enough and non-relocatable.
+//
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
+{
+    if (!varTypeIsFloating(parentNode->TypeGet()))
+    {
+        // Make sure we have an actual immediate
+        if (!childNode->IsCnsIntOrI())
+            return false;
+        if (childNode->AsIntCon()->ImmedValNeedsReloc(comp))
+            return false;
+
+        // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type.
+        target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal;
+
+        switch (parentNode->OperGet())
+        {
+            case GT_CMPXCHG:
+            case GT_LOCKADD:
+            case GT_XADD:
+                NYI_LOONGARCH64("GT_CMPXCHG,GT_LOCKADD,GT_XADD");
+                break;
+
+            case GT_ADD:
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+            case GT_BOUNDS_CHECK:
+                return emitter::isValidSimm12(immVal);
+            case GT_AND:
+            case GT_OR:
+            case GT_XOR:
+                return emitter::isValidUimm12(immVal);
+            case GT_JCMP:
+                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
+                return true;
+
+            case GT_STORE_LCL_FLD:
+            case GT_STORE_LCL_VAR:
+                if (immVal == 0)
+                    return true;
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node.
+//
+// Performs contaiment checks.
+//
+// TODO-LoongArch64-CQ: recognize GT_MULs that can be turned into MUL_LONGs,
+// as those are cheaper.
+//
+// Arguments:
+//    mul - The node to lower
+//
+// Return Value:
+//    The next node to lower.
+//
+GenTree* Lowering::LowerMul(GenTreeOp* mul)
+{
+    assert(mul->OperIsMul());
+
+    ContainCheckMul(mul);
+
+    return mul->gtNext;
+}
+
+//------------------------------------------------------------------------
+// LowerBinaryArithmetic: lowers the given binary arithmetic node.
+//
+// Arguments:
+//    node - the arithmetic node to lower
+//
+// Returns:
+//    The next node to lower.
+//
+GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
+{
+    if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
+    {
+        GenTree* opNode  = nullptr;
+        GenTree* notNode = nullptr;
+        if (binOp->gtGetOp1()->OperIs(GT_NOT))
+        {
+            notNode = binOp->gtGetOp1();
+            opNode  = binOp->gtGetOp2();
+        }
+        else if (binOp->gtGetOp2()->OperIs(GT_NOT))
+        {
+            notNode = binOp->gtGetOp2();
+            opNode  = binOp->gtGetOp1();
+        }
+
+        if (notNode != nullptr)
+        {
+            binOp->gtOp1 = opNode;
+            binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
+            binOp->ChangeOper(GT_AND_NOT);
+            BlockRange().Remove(notNode);
+        }
+    }
+
+    ContainCheckBinary(binOp);
+
+    return binOp->gtNext;
+}
+
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+//    storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+//    This involves:
+//    - Widening operations of unsigneds.
+//
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+    // Try to widen the ops if they are going into a local var.
+    GenTree* op1 = storeLoc->gtGetOp1();
+    if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+    {
+        GenTreeIntCon* con    = op1->AsIntCon();
+        ssize_t        ival   = con->gtIconVal;
+        unsigned       varNum = storeLoc->GetLclNum();
+        LclVarDsc*     varDsc = comp->lvaGetDesc(varNum);
+
+        if (varDsc->lvIsSIMDType())
+        {
+            noway_assert(storeLoc->gtType != TYP_STRUCT);
+        }
+        unsigned size = genTypeSize(storeLoc);
+        // If we are storing a constant into a local variable
+        // we extend the size of the store here
+        if ((size < 4) && !varTypeIsStruct(varDsc))
+        {
+            if (!varTypeIsUnsigned(varDsc))
+            {
+                if (genTypeSize(storeLoc) == 1)
+                {
+                    if ((ival & 0x7f) != ival)
+                    {
+                        ival = ival | 0xffffff00;
+                    }
+                }
+                else
+                {
+                    assert(genTypeSize(storeLoc) == 2);
+                    if ((ival & 0x7fff) != ival)
+                    {
+                        ival = ival | 0xffff0000;
+                    }
+                }
+            }
+
+            // A local stack slot is at least 4 bytes in size, regardless of
+            // what the local var is typed as, so auto-promote it here
+            // unless it is a field of a promoted struct
+            // TODO-CQ: if the field is promoted shouldn't we also be able to do this?
+            if (!varDsc->lvIsStructField)
+            {
+                storeLoc->gtType = TYP_INT;
+                con->SetIconValue(ival);
+            }
+        }
+    }
+    if (storeLoc->OperIs(GT_STORE_LCL_FLD))
+    {
+        // We should only encounter this for lclVars that are lvDoNotEnregister.
+        verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
+    }
+    ContainCheckStoreLoc(storeLoc);
+}
+
+//------------------------------------------------------------------------
+// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained.
+//
+// Arguments:
+//    node       - The indirect store node (GT_STORE_IND) of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
+{
+    ContainCheckStoreIndir(node);
+}
+
+//------------------------------------------------------------------------
+// LowerBlockStore: Set block store type
+//
+// Arguments:
+//    blkNode       - The block store node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree* dstAddr = blkNode->Addr();
+    GenTree* src     = blkNode->Data();
+    unsigned size    = blkNode->Size();
+
+    if (blkNode->OperIsInitBlkOp())
+    {
+        if (src->OperIs(GT_INIT_VAL))
+        {
+            src->SetContained();
+            src = src->AsUnOp()->gtGetOp1();
+        }
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            blkNode->SetOper(GT_STORE_BLK);
+        }
+
+        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT))
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+            // The fill value of an initblk is interpreted to hold a
+            // value of (unsigned int8) however a constant of any size
+            // may practically reside on the evaluation stack. So extract
+            // the lower byte out of the initVal constant and replicate
+            // it to a larger constant whose size is sufficient to support
+            // the largest width store of the desired inline expansion.
+
+            ssize_t fill = src->AsIntCon()->IconValue() & 0xFF;
+            if (fill == 0)
+            {
+                src->SetContained();
+            }
+            else if (size >= REGSIZE_BYTES)
+            {
+                fill *= 0x0101010101010101LL;
+                src->gtType = TYP_LONG;
+            }
+            else
+            {
+                fill *= 0x01010101;
+            }
+            src->AsIntCon()->SetIconValue(fill);
+
+            ContainBlockStoreAddress(blkNode, size, dstAddr);
+        }
+        else
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+        }
+    }
+    else
+    {
+        assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
+        src->SetContained();
+
+        if (src->OperIs(GT_IND))
+        {
+            // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained.
+            // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the
+            // address, not knowing that GT_IND is part of a block op that has containment restrictions.
+            src->AsIndir()->Addr()->ClearContained();
+        }
+        else if (src->OperIs(GT_LCL_VAR))
+        {
+            // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
+            const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
+            comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
+        }
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            if (!blkNode->AsObj()->GetLayout()->HasGCPtr())
+            {
+                blkNode->SetOper(GT_STORE_BLK);
+            }
+            else if (dstAddr->OperIsLocalAddr() && (size <= CPBLK_UNROLL_LIMIT))
+            {
+                // If the size is small enough to unroll then we need to mark the block as non-interruptible
+                // to actually allow unrolling. The generated code does not report GC references loaded in the
+                // temporary register(s) used for copying.
+                blkNode->SetOper(GT_STORE_BLK);
+                blkNode->gtBlkOpGcUnsafe = true;
+            }
+        }
+
+        // CopyObj or CopyBlk
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
+
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+        }
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+        else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT))
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+            if (src->OperIs(GT_IND))
+            {
+                ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr());
+            }
+
+            ContainBlockStoreAddress(blkNode, size, dstAddr);
+        }
+        else
+        {
+            assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
+
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store.
+//
+// Arguments:
+//    blkNode - the block store node
+//    size - the block size
+//    addr - the address node to try to contain
+//
+void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr)
+{
+    assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
+    assert(size < INT32_MAX);
+
+    if (addr->OperIsLocalAddr())
+    {
+        addr->SetContained();
+        return;
+    }
+
+    if (!addr->OperIs(GT_ADD) || addr->gtOverflow() || !addr->AsOp()->gtGetOp2()->OperIs(GT_CNS_INT))
+    {
+        return;
+    }
+
+    GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon();
+    ssize_t        offset     = offsetNode->IconValue();
+
+    // TODO-LoongArch64: not including the ldptr and SIMD offset which not used right now.
+    if (!emitter::isValidSimm12(offset) || !emitter::isValidSimm12(offset + static_cast<int>(size)))
+    {
+        return;
+    }
+
+    if (!IsSafeToContainMem(blkNode, addr))
+    {
+        return;
+    }
+
+    BlockRange().Remove(offsetNode);
+
+    addr->ChangeOper(GT_LEA);
+    addr->AsAddrMode()->SetIndex(nullptr);
+    addr->AsAddrMode()->SetScale(0);
+    addr->AsAddrMode()->SetOffset(static_cast<int>(offset));
+    addr->SetContained();
+}
+
+//------------------------------------------------------------------------
+// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
+//
+// Arguments:
+//    tree - GT_CAST node to be lowered
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Casts from float/double to a smaller int type are transformed as follows:
+//    GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
+//    GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
+//    GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
+//    GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
+//
+//    Note that for the overflow conversions we still depend on helper calls and
+//    don't expect to see them here.
+//    i) GT_CAST(float/double, int type with overflow detection)
+//
+
+void Lowering::LowerCast(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_CAST);
+
+    JITDUMP("LowerCast for: ");
+    DISPNODE(tree);
+    JITDUMP("\n");
+
+    GenTree*  op1     = tree->AsOp()->gtOp1;
+    var_types dstType = tree->CastToType();
+    var_types srcType = genActualType(op1->TypeGet());
+
+    if (varTypeIsFloating(srcType))
+    {
+        noway_assert(!tree->gtOverflow());
+        assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small
+                                          // int.
+    }
+
+    assert(!varTypeIsSmall(srcType));
+
+    // Now determine if we have operands that should be contained.
+    ContainCheckCast(tree->AsCast());
+}
+
+//------------------------------------------------------------------------
+// LowerRotate: Lower GT_ROL and GT_ROR nodes.
+//
+// Arguments:
+//    tree - the node to lower
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerRotate(GenTree* tree)
+{
+    if (tree->OperGet() == GT_ROL)
+    {
+        // Convert ROL into ROR.
+        GenTree* rotatedValue        = tree->AsOp()->gtOp1;
+        unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+        GenTree* rotateLeftIndexNode = tree->AsOp()->gtOp2;
+
+        if (rotateLeftIndexNode->IsCnsIntOrI())
+        {
+            ssize_t rotateLeftIndex                    = rotateLeftIndexNode->AsIntCon()->gtIconVal;
+            ssize_t rotateRightIndex                   = rotatedValueBitSize - rotateLeftIndex;
+            rotateLeftIndexNode->AsIntCon()->gtIconVal = rotateRightIndex;
+        }
+        else
+        {
+            GenTree* tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+            BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+            tree->AsOp()->gtOp2 = tmp;
+        }
+        tree->ChangeOper(GT_ROR);
+    }
+    ContainCheckShiftRotate(tree->AsOp());
+}
+
+#ifdef FEATURE_SIMD
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node.
+//
+//  Arguments:
+//     simdNode - The SIMD intrinsic node.
+//
+void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
+{
+    NYI_LOONGARCH64("LowerSIMD");
+}
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    NYI_LOONGARCH64("LowerHWIntrinsic");
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::IsValidConstForMovImm: Determines if the given node can be replaced by a mov/fmov immediate instruction
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+//  Returns:
+//     true if the node can be replaced by a mov/fmov immediate instruction; otherwise, false
+//
+//  IMPORTANT:
+//     This check may end up modifying node->gtOp1 if it is a cast node that can be removed
+bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
+{
+    NYI_LOONGARCH64("IsValidConstForMovImm");
+    return false;
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic
+//
+//  Arguments:
+//     node  - The hardware intrinsic node.
+//     cmpOp - The comparison operation, currently must be GT_EQ or GT_NE
+//
+void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
+{
+    NYI_LOONGARCH64("LowerHWIntrinsicCmpOp");
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
+{
+    NYI_LOONGARCH64("LowerHWIntrinsicCreate");
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsicDot: Lowers a Vector64 or Vector128 Dot call
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
+{
+    NYI_LOONGARCH64("LowerHWIntrinsicDot");
+}
+
+#endif // FEATURE_HW_INTRINSICS
+
+//------------------------------------------------------------------------
+// Containment analysis
+//------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// ContainCheckCallOperands: Determine whether operands of a call should be contained.
+//
+// Arguments:
+//    call       - The call node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::ContainCheckCallOperands(GenTreeCall* call)
+{
+    // There are no contained operands for LoongArch64.
+}
+
+//------------------------------------------------------------------------
+// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
+{
+    GenTree* src = node->Data();
+    if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+    {
+        // an integer zero for 'src' can be contained.
+        MakeSrcContained(node, src);
+    }
+
+    ContainCheckIndir(node);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckIndir: Determine whether operands of an indir should be contained.
+//
+// Arguments:
+//    indirNode - The indirection node of interest
+//
+// Notes:
+//    This is called for both store and load indirections.
+//
+// Return Value:
+//    None.
+//
+void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
+{
+    // If this is the rhs of a block copy it will be handled when we handle the store.
+    if (indirNode->TypeGet() == TYP_STRUCT)
+    {
+        return;
+    }
+
+#ifdef FEATURE_SIMD
+    NYI_LOONGARCH64("ContainCheckIndir-SIMD");
+#endif // FEATURE_SIMD
+
+    GenTree* addr = indirNode->Addr();
+    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr))
+    {
+        MakeSrcContained(indirNode, addr);
+    }
+    else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+    {
+        // These nodes go into an addr mode:
+        // - GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR is a stack addr mode.
+        MakeSrcContained(indirNode, addr);
+    }
+    else if (addr->OperIs(GT_CLS_VAR_ADDR))
+    {
+        // These nodes go into an addr mode:
+        // - GT_CLS_VAR_ADDR turns into a constant.
+        // make this contained, it turns into a constant that goes into an addr mode
+        MakeSrcContained(indirNode, addr);
+    }
+}
+
+//------------------------------------------------------------------------
+// ContainCheckBinary: Determine whether a binary op's operands should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckBinary(GenTreeOp* node)
+{
+    // Check and make op2 contained (if it is a containable immediate)
+    CheckImmedAndMakeContained(node, node->gtOp2);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckMul: Determine whether a mul op's operands should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckMul(GenTreeOp* node)
+{
+    ContainCheckBinary(node);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckDivOrMod: determine which operands of a div/mod should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
+{
+    assert(node->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV));
+}
+
+//------------------------------------------------------------------------
+// ContainCheckShiftRotate: Determine whether a mul op's operands should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckShiftRotate(GenTreeOp* node)
+{
+    GenTree* shiftBy = node->gtOp2;
+    assert(node->OperIsShiftOrRotate());
+
+    if (shiftBy->IsCnsIntOrI())
+    {
+        MakeSrcContained(node, shiftBy);
+    }
+}
+
+//------------------------------------------------------------------------
+// ContainCheckStoreLoc: determine whether the source of a STORE_LCL* should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
+{
+    assert(storeLoc->OperIsLocalStore());
+    GenTree* op1 = storeLoc->gtGetOp1();
+
+    if (op1->OperIs(GT_BITCAST))
+    {
+        // If we know that the source of the bitcast will be in a register, then we can make
+        // the bitcast itself contained. This will allow us to store directly from the other
+        // type if this node doesn't get a register.
+        GenTree* bitCastSrc = op1->gtGetOp1();
+        if (!bitCastSrc->isContained() && !bitCastSrc->IsRegOptional())
+        {
+            op1->SetContained();
+            return;
+        }
+    }
+
+    const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
+
+#ifdef FEATURE_SIMD
+    if (varTypeIsSIMD(storeLoc))
+    {
+        // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR.
+        if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister)
+        {
+            // For an InitBlk we want op1 to be contained
+            MakeSrcContained(storeLoc, op1);
+            if (op1->IsSIMDZero())
+            {
+                MakeSrcContained(op1, op1->gtGetOp1());
+            }
+        }
+        return;
+    }
+#endif // FEATURE_SIMD
+    if (IsContainableImmed(storeLoc, op1))
+    {
+        MakeSrcContained(storeLoc, op1);
+    }
+
+    // If the source is a containable immediate, make it contained, unless it is
+    // an int-size or larger store of zero to memory, because we can generate smaller code
+    // by zeroing a register and then storing it.
+    var_types type = varDsc->GetRegisterType(storeLoc);
+    if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type)))
+    {
+        MakeSrcContained(storeLoc, op1);
+    }
+}
+
+//------------------------------------------------------------------------
+// ContainCheckCast: determine whether the source of a CAST node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckCast(GenTreeCast* node)
+{
+    // There are no contained operands for LoongArch64.
+}
+
+//------------------------------------------------------------------------
+// ContainCheckCompare: determine whether the sources of a compare node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckCompare(GenTreeOp* cmp)
+{
+    CheckImmedAndMakeContained(cmp, cmp->gtOp2);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckBoundsChk: determine whether any source of a bounds check node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
+{
+    assert(node->OperIs(GT_BOUNDS_CHECK));
+    if (!CheckImmedAndMakeContained(node, node->GetIndex()))
+    {
+        CheckImmedAndMakeContained(node, node->GetArrayLength());
+    }
+}
+
+#ifdef FEATURE_SIMD
+//----------------------------------------------------------------------------------------------
+// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node.
+//
+//  Arguments:
+//     simdNode - The SIMD intrinsic node.
+//
+void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
+{
+    NYI_LOONGARCH64("ContainCheckSIMD");
+}
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+//----------------------------------------------------------------------------------------------
+// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    NYI_LOONGARCH64("ContainCheckHWIntrinsic");
+}
+#endif // FEATURE_HW_INTRINSICS
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index 2a85c814adcb6..dbebd44596218 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -703,8 +703,10 @@ LinearScan::LinearScan(Compiler* theCompiler)
     enregisterLocalVars = compiler->compEnregLocals();
 #ifdef TARGET_ARM64
     availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd);
+#elif TARGET_LOONGARCH64
+    availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd);
 #else
-    availableIntRegs   = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
+    availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
 #endif
 
 #if ETW_EBP_FRAMED
@@ -1571,11 +1573,19 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
 #endif // FEATURE_SIMD
 
         case TYP_STRUCT:
+        {
             // TODO-1stClassStructs: support vars with GC pointers. The issue is that such
             // vars will have `lvMustInit` set, because emitter has poor support for struct liveness,
             // but if the variable is tracked the prolog generator would expect it to be in liveIn set,
             // so an assert in `genFnProlog` will fire.
-            return compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
+            bool isRegCandidate = compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
+#ifdef TARGET_LOONGARCH64
+            // The LoongArch64's ABI which the float args within a struct maybe passed by integer register
+            // when no float register left but free integer register.
+            isRegCandidate &= !genIsValidFloatReg(varDsc->GetOtherArgReg());
+#endif
+            return isRegCandidate;
+        }
 
         case TYP_UNDEF:
         case TYP_UNKNOWN:
@@ -2576,7 +2586,7 @@ void LinearScan::setFrameType()
 
     compiler->rpFrameType = frameType;
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // Determine whether we need to reserve a register for large lclVar offsets.
     if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT))
     {
@@ -2586,7 +2596,7 @@ void LinearScan::setFrameType()
         JITDUMP("  Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD));
         removeMask |= RBM_OPT_RSVD;
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0))
     {
@@ -2652,11 +2662,24 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition*
     assert(refPosition->getInterval() == currentInterval);
     RegisterType regType    = currentInterval->registerType;
     regMaskTP    candidates = refPosition->registerAssignment;
-
+#ifdef TARGET_LOONGARCH64
+    // The LoongArch64's ABI which the float args maybe passed by integer register
+    // when no float register left but free integer register.
+    if ((candidates & allRegs(regType)) != RBM_NONE)
+    {
+        return regType;
+    }
+    else
+    {
+        assert((regType == TYP_DOUBLE) || (regType == TYP_FLOAT));
+        assert((candidates & allRegs(TYP_I_IMPL)) != RBM_NONE);
+        return TYP_I_IMPL;
+    }
+#else
     assert((candidates & allRegs(regType)) != RBM_NONE);
     return regType;
+#endif
 }
-
 //------------------------------------------------------------------------
 // isMatchingConstant: Check to see whether a given register contains the constant referenced
 //                     by the given RefPosition
@@ -7684,7 +7707,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
         }
     }
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     // Next, if this blocks ends with a JCMP, we have to make sure:
     // 1. Not to copy into the register that JCMP uses
     //    e.g. JCMP w21, BRANCH
@@ -7722,7 +7745,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
             }
         }
     }
-#endif
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
     regMaskTP   sameWriteRegs   = RBM_NONE;
@@ -7797,12 +7820,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
                 sameToReg = REG_NA;
             }
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex))
             {
                 sameToReg = REG_NA;
             }
-#endif
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             // If the var is live only at those blocks connected by a split edge and not live-in at some of the
             // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index 56f00145b68ba..86978763a2bf3 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -762,6 +762,9 @@ class LinearScan : public LinearScanInterface
 #elif defined(TARGET_X86)
     static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
     static const regMaskTP LsraLimitSmallFPSet  = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+#elif defined(TARGET_LOONGARCH64)
+    static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
+    static const regMaskTP LsraLimitSmallFPSet  = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
 #else
 #error Unsupported or unset target architecture
 #endif // target
@@ -1003,13 +1006,16 @@ class LinearScan : public LinearScanInterface
                                             bool         isUse);
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
-#if defined(UNIX_AMD64_ABI)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
     // For AMD64 on SystemV machines. This method
     // is called as replacement for raUpdateRegStateForArg
     // that is used on Windows. On System V systems a struct can be passed
     // partially using registers from the 2 register files.
-    void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
-#endif // defined(UNIX_AMD64_ABI)
+    //
+    // For LoongArch64's ABI, a struct can be passed
+    // partially using registers from the 2 register files.
+    void UpdateRegStateForStructArg(LclVarDsc* argDsc);
+#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 
     // Update reg state for an incoming register argument
     void updateRegStateForArg(LclVarDsc* argDsc);
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index 04df846e367e7..6d2a83a338284 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -593,7 +593,14 @@ RefPosition* LinearScan::newRefPosition(Interval*    theInterval,
         regNumber    physicalReg = genRegNumFromMask(mask);
         RefPosition* pos         = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
         assert(theInterval != nullptr);
+#ifdef TARGET_LOONGARCH64
+        // The LoongArch64's ABI which the float args maybe passed by integer register
+        // when no float register left but free integer register.
+        assert((regType(theInterval->registerType) == FloatRegisterType) ||
+               (allRegs(theInterval->registerType) & mask) != 0);
+#else
         assert((allRegs(theInterval->registerType) & mask) != 0);
+#endif
     }
 
     RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
@@ -1987,9 +1994,11 @@ void LinearScan::insertZeroInitRefPositions()
     }
 }
 
-#if defined(UNIX_AMD64_ABI)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 //------------------------------------------------------------------------
-// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems.
+// UpdateRegStateForStructArg:
+//    Sets the register state for an argument of type STRUCT.
+//    This is shared between with AMD64's SystemV systems and LoongArch64-ABI.
 //
 // Arguments:
 //    argDsc - the LclVarDsc for the argument of interest
@@ -1998,7 +2007,7 @@ void LinearScan::insertZeroInitRefPositions()
 //     See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
 //         for how state for argument is updated for unix non-structs and Windows AMD64 structs.
 //
-void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc)
 {
     assert(varTypeIsStruct(argDsc));
     RegState* intRegState   = &compiler->codeGen->intRegState;
@@ -2033,7 +2042,7 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
     }
 }
 
-#endif // defined(UNIX_AMD64_ABI)
+#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 
 //------------------------------------------------------------------------
 // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
@@ -2056,15 +2065,15 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
 //
 void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
 {
-#if defined(UNIX_AMD64_ABI)
-    // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
-    // Handle them here.
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
+    // For SystemV-AMD64 and LoongArch64 calls the argDsc
+    // can have 2 registers (for structs.). Handle them here.
     if (varTypeIsStruct(argDsc))
     {
-        unixAmd64UpdateRegStateForArg(argDsc);
+        UpdateRegStateForStructArg(argDsc);
     }
     else
-#endif // defined(UNIX_AMD64_ABI)
+#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
     {
         RegState* intRegState   = &compiler->codeGen->intRegState;
         RegState* floatRegState = &compiler->codeGen->floatRegState;
@@ -3976,6 +3985,13 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree)
     addrCandidates = RBM_WRITE_BARRIER_DST;
     srcCandidates  = RBM_WRITE_BARRIER_SRC;
 
+#elif defined(TARGET_LOONGARCH64)
+    // the 'addr' goes into t6 (REG_WRITE_BARRIER_DST)
+    // the 'src'  goes into t7 (REG_WRITE_BARRIER_SRC)
+    //
+    addrCandidates = RBM_WRITE_BARRIER_DST;
+    srcCandidates  = RBM_WRITE_BARRIER_SRC;
+
 #elif defined(TARGET_X86) && NOGC_WRITE_BARRIERS
 
     bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src);
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
new file mode 100644
index 0000000000000..2f259f7efffbd
--- /dev/null
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -0,0 +1,1325 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                    Register Requirements for LOONGARCH64                  XX
+XX                                                                           XX
+XX  This encapsulates all the logic for setting register requirements for    XX
+XX  the LOONGARCH64 architecture.                                            XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_LOONGARCH64
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+//------------------------------------------------------------------------
+// BuildNode: Build the RefPositions for for a node
+//
+// Arguments:
+//    treeNode - the node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+// Notes:
+// Preconditions:
+//    LSRA Has been initialized.
+//
+// Postconditions:
+//    RefPositions have been built for all the register defs and uses required
+//    for this node.
+//
+int LinearScan::BuildNode(GenTree* tree)
+{
+    assert(!tree->isContained());
+    int       srcCount      = 0;
+    int       dstCount      = 0;
+    regMaskTP dstCandidates = RBM_NONE;
+    regMaskTP killMask      = RBM_NONE;
+    bool      isLocalDefUse = false;
+
+    // Reset the build-related members of LinearScan.
+    clearBuildState();
+
+    // Set the default dstCount. This may be modified below.
+    if (tree->IsValue())
+    {
+        dstCount = 1;
+        if (tree->IsUnusedValue())
+        {
+            isLocalDefUse = true;
+        }
+    }
+    else
+    {
+        dstCount = 0;
+    }
+
+    switch (tree->OperGet())
+    {
+        default:
+            srcCount = BuildSimple(tree);
+            break;
+
+        case GT_LCL_VAR:
+            // We make a final determination about whether a GT_LCL_VAR is a candidate or contained
+            // after liveness. In either case we don't build any uses or defs. Otherwise, this is a
+            // load of a stack-based local into a register and we'll fall through to the general
+            // local case below.
+            if (checkContainedOrCandidateLclVar(tree->AsLclVar()))
+            {
+                return 0;
+            }
+            FALLTHROUGH;
+        case GT_LCL_FLD:
+        {
+            srcCount = 0;
+#ifdef FEATURE_SIMD
+            // Need an additional register to read upper 4 bytes of Vector3.
+            if (tree->TypeGet() == TYP_SIMD12)
+            {
+                // We need an internal register different from targetReg in which 'tree' produces its result
+                // because both targetReg and internal reg will be in use at the same time.
+                buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
+                setInternalRegsDelayFree = true;
+                buildInternalRegisterUses();
+            }
+#endif
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+            if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar()))
+            {
+                dstCount = compiler->lvaGetDesc(tree->AsLclVar())->lvFieldCnt;
+            }
+            FALLTHROUGH;
+
+        case GT_STORE_LCL_FLD:
+            srcCount = BuildStoreLoc(tree->AsLclVarCommon());
+            break;
+
+        case GT_FIELD_LIST:
+            // These should always be contained. We don't correctly allocate or
+            // generate code for a non-contained GT_FIELD_LIST.
+            noway_assert(!"Non-contained GT_FIELD_LIST");
+            srcCount = 0;
+            break;
+
+        case GT_ARGPLACE:
+        case GT_NO_OP:
+        case GT_START_NONGC:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_PROF_HOOK:
+            srcCount = 0;
+            assert(dstCount == 0);
+            killMask = getKillSetForProfilerHook();
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+            break;
+
+        case GT_START_PREEMPTGC:
+            // This kills GC refs in callee save regs
+            srcCount = 0;
+            assert(dstCount == 0);
+            BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
+            break;
+
+        case GT_CNS_DBL:
+        {
+            // There is no instruction for loading float/double imm directly into FPR.
+            // Reserve int to load constant from memory (IF_LARGELDC)
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
+        }
+            FALLTHROUGH;
+
+        case GT_CNS_INT:
+        {
+            srcCount = 0;
+            assert(dstCount == 1);
+            RefPosition* def               = BuildDef(tree);
+            def->getInterval()->isConstant = true;
+        }
+        break;
+
+        case GT_BOX:
+        case GT_COMMA:
+        case GT_QMARK:
+        case GT_COLON:
+        case GT_CLS_VAR:
+        case GT_ADDR:
+            srcCount = 0;
+            assert(dstCount == 0);
+            unreached();
+            break;
+
+        case GT_RETURN:
+            srcCount = BuildReturn(tree);
+            killMask = getKillSetForReturn();
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+            break;
+
+        case GT_RETFILT:
+            assert(dstCount == 0);
+            if (tree->TypeGet() == TYP_VOID)
+            {
+                srcCount = 0;
+            }
+            else
+            {
+                assert(tree->TypeGet() == TYP_INT);
+                srcCount = 1;
+                BuildUse(tree->gtGetOp1(), RBM_INTRET);
+            }
+            break;
+
+        case GT_NOP:
+            // A GT_NOP is either a passthrough (if it is void, or if it has
+            // a child), but must be considered to produce a dummy value if it
+            // has a type but no child.
+            srcCount = 0;
+            if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
+            {
+                assert(dstCount == 1);
+                BuildDef(tree);
+            }
+            else
+            {
+                assert(dstCount == 0);
+            }
+            break;
+
+        case GT_KEEPALIVE:
+            assert(dstCount == 0);
+            srcCount = BuildOperandUses(tree->gtGetOp1());
+            break;
+
+        case GT_JTRUE:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_JMP:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_SWITCH:
+            // This should never occur since switch nodes must not be visible at this
+            // point in the JIT.
+            srcCount = 0;
+            noway_assert(!"Switch must be lowered at this point");
+            break;
+
+        case GT_JMPTABLE:
+            srcCount = 0;
+            assert(dstCount == 1);
+            BuildDef(tree);
+            break;
+
+        case GT_SWITCH_TABLE:
+            buildInternalIntRegisterDefForNode(tree);
+            srcCount = BuildBinaryUses(tree->AsOp());
+            assert(dstCount == 0);
+            break;
+
+        case GT_ASG:
+            noway_assert(!"We should never hit any assignment operator in lowering");
+            srcCount = 0;
+            break;
+
+        case GT_ADD:
+        case GT_SUB:
+            if (varTypeIsFloating(tree->TypeGet()))
+            {
+                // overflow operations aren't supported on float/double types.
+                assert(!tree->gtOverflow());
+
+                // No implicit conversions at this stage as the expectation is that
+                // everything is made explicit by adding casts.
+                assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
+            }
+            else if (tree->gtOverflow())
+            {
+                // Need a register different from target reg to check for overflow.
+                buildInternalIntRegisterDefForNode(tree);
+                setInternalRegsDelayFree = true;
+            }
+            FALLTHROUGH;
+
+        case GT_AND:
+        case GT_AND_NOT:
+        case GT_OR:
+        case GT_XOR:
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROR:
+            srcCount = BuildBinaryUses(tree->AsOp());
+            assert(dstCount == 1);
+            BuildDef(tree);
+            break;
+
+        case GT_RETURNTRAP:
+            // this just turns into a compare of its child with an int
+            // + a conditional call
+            BuildUse(tree->gtGetOp1());
+            srcCount = 1;
+            assert(dstCount == 0);
+            killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+            break;
+
+        case GT_MUL:
+            if (tree->gtOverflow())
+            {
+                // Need a register different from target reg to check for overflow.
+                buildInternalIntRegisterDefForNode(tree);
+                setInternalRegsDelayFree = true;
+            }
+            FALLTHROUGH;
+
+        case GT_MOD:
+        case GT_UMOD:
+        case GT_DIV:
+        case GT_MULHI:
+        case GT_UDIV:
+        {
+            srcCount = BuildBinaryUses(tree->AsOp());
+            buildInternalRegisterUses();
+            assert(dstCount == 1);
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_INTRINSIC:
+        {
+            noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt));
+
+            // Both operand and its result must be of the same floating point type.
+            GenTree* op1 = tree->gtGetOp1();
+            assert(varTypeIsFloating(op1));
+            assert(op1->TypeGet() == tree->TypeGet());
+
+            BuildUse(op1);
+            srcCount = 1;
+            assert(dstCount == 1);
+            BuildDef(tree);
+        }
+        break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            srcCount = BuildSIMD(tree->AsSIMD());
+            break;
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWINTRINSIC:
+            srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount);
+            break;
+#endif // FEATURE_HW_INTRINSICS
+
+        case GT_CAST:
+            assert(dstCount == 1);
+            srcCount = BuildCast(tree->AsCast());
+            break;
+
+        case GT_NEG:
+        case GT_NOT:
+            BuildUse(tree->gtGetOp1());
+            srcCount = 1;
+            assert(dstCount == 1);
+            BuildDef(tree);
+            break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        case GT_JCMP:
+            srcCount = BuildCmp(tree);
+            break;
+
+        case GT_CKFINITE:
+            srcCount = 1;
+            assert(dstCount == 1);
+            buildInternalIntRegisterDefForNode(tree);
+            BuildUse(tree->gtGetOp1());
+            BuildDef(tree);
+            buildInternalRegisterUses();
+            break;
+
+        case GT_CMPXCHG:
+        {
+            NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+        }
+        break;
+
+        case GT_LOCKADD:
+        case GT_XORR:
+        case GT_XAND:
+        case GT_XADD:
+        case GT_XCHG:
+        {
+            NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+        }
+        break;
+
+#if FEATURE_ARG_SPLIT
+        case GT_PUTARG_SPLIT:
+            srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
+            dstCount = tree->AsPutArgSplit()->gtNumRegs;
+            break;
+#endif // FEATURE_ARG_SPLIT
+
+        case GT_PUTARG_STK:
+            srcCount = BuildPutArgStk(tree->AsPutArgStk());
+            break;
+
+        case GT_PUTARG_REG:
+            srcCount = BuildPutArgReg(tree->AsUnOp());
+            break;
+
+        case GT_CALL:
+            srcCount = BuildCall(tree->AsCall());
+            if (tree->AsCall()->HasMultiRegRetVal())
+            {
+                dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+            }
+            break;
+
+        case GT_BLK:
+            // These should all be eliminated prior to Lowering.
+            assert(!"Non-store block node in Lowering");
+            srcCount = 0;
+            break;
+
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+            srcCount = BuildBlockStore(tree->AsBlk());
+            break;
+
+        case GT_INIT_VAL:
+            // Always a passthrough of its child's value.
+            assert(!"INIT_VAL should always be contained");
+            srcCount = 0;
+            break;
+
+        case GT_LCLHEAP:
+        {
+            assert(dstCount == 1);
+
+            // Need a variable number of temp regs (see genLclHeap() in codegenloongarch64.cpp):
+            // Here '-' means don't care.
+            //
+            //  Size?                   Init Memory?    # temp regs
+            //   0                          -               0
+            //   const and <=UnrollLimit    -               0
+            //   const and <PageSize        No              0
+            //   >UnrollLimit               Yes             0
+            //   Non-const                  Yes             0
+            //   Non-const                  No              2
+            //
+
+            GenTree* size = tree->gtGetOp1();
+            if (size->IsCnsIntOrI())
+            {
+                assert(size->isContained());
+                srcCount = 0;
+
+                size_t sizeVal = size->AsIntCon()->gtIconVal;
+
+                if (sizeVal != 0)
+                {
+                    // Compute the amount of memory to properly STACK_ALIGN.
+                    // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+                    // This should also help in debugging as we can examine the original size specified with
+                    // localloc.
+                    sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+
+                    // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc)
+                    // TODO-LoongArch64: maybe use paird-load/store or SIMD in future.
+                    if (sizeVal <= (REGSIZE_BYTES * 2 * 4))
+                    {
+                        // Need no internal registers
+                    }
+                    else if (!compiler->info.compInitMem)
+                    {
+                        // No need to initialize allocated stack space.
+                        if (sizeVal < compiler->eeGetPageSize())
+                        {
+                            // Need no internal registers
+                        }
+                        else
+                        {
+                            // We need two registers: regCnt and RegTmp
+                            buildInternalIntRegisterDefForNode(tree);
+                            buildInternalIntRegisterDefForNode(tree);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                srcCount = 1;
+                if (!compiler->info.compInitMem)
+                {
+                    buildInternalIntRegisterDefForNode(tree);
+                    buildInternalIntRegisterDefForNode(tree);
+                }
+            }
+
+            if (!size->isContained())
+            {
+                BuildUse(size);
+            }
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_BOUNDS_CHECK:
+        {
+            GenTreeBoundsChk* node = tree->AsBoundsChk();
+            // Consumes arrLen & index - has no result
+            assert(dstCount == 0);
+            srcCount = BuildOperandUses(node->GetIndex());
+            srcCount += BuildOperandUses(node->GetArrayLength());
+        }
+        break;
+
+        case GT_ARR_ELEM:
+            // These must have been lowered to GT_ARR_INDEX
+            noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_ARR_INDEX:
+        {
+            srcCount = 2;
+            assert(dstCount == 1);
+            buildInternalIntRegisterDefForNode(tree);
+            setInternalRegsDelayFree = true;
+
+            // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+            // times while the result is being computed.
+            RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
+            setDelayFree(arrObjUse);
+            BuildUse(tree->AsArrIndex()->IndexExpr());
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_ARR_OFFSET:
+            // This consumes the offset, if any, the arrObj and the effective index,
+            // and produces the flattened offset for this dimension.
+            srcCount = 2;
+            if (!tree->AsArrOffs()->gtOffset->isContained())
+            {
+                BuildUse(tree->AsArrOffs()->gtOffset);
+                srcCount++;
+            }
+            BuildUse(tree->AsArrOffs()->gtIndex);
+            BuildUse(tree->AsArrOffs()->gtArrObj);
+            assert(dstCount == 1);
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
+            BuildDef(tree);
+            break;
+
+        case GT_LEA:
+        {
+            GenTreeAddrMode* lea = tree->AsAddrMode();
+
+            GenTree* base  = lea->Base();
+            GenTree* index = lea->Index();
+            int      cns   = lea->Offset();
+
+            // This LEA is instantiating an address, so we set up the srcCount here.
+            srcCount = 0;
+            if (base != nullptr)
+            {
+                srcCount++;
+                BuildUse(base);
+            }
+            if (index != nullptr)
+            {
+                srcCount++;
+                BuildUse(index);
+            }
+            assert(dstCount == 1);
+
+            // On LOONGARCH64 we may need a single internal register
+            // (when both conditions are true then we still only need a single internal register)
+            if ((index != nullptr) && (cns != 0))
+            {
+                // LOONGARCH64 does not support both Index and offset so we need an internal register
+                buildInternalIntRegisterDefForNode(tree);
+            }
+            else if (!emitter::isValidSimm12(cns))
+            {
+                // This offset can't be contained in the add instruction, so we need an internal register
+                buildInternalIntRegisterDefForNode(tree);
+            }
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            assert(dstCount == 0);
+
+            if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree))
+            {
+                srcCount = BuildGCWriteBarrier(tree);
+                break;
+            }
+
+            srcCount = BuildIndir(tree->AsIndir());
+            if (!tree->gtGetOp2()->isContained())
+            {
+                BuildUse(tree->gtGetOp2());
+                srcCount++;
+            }
+        }
+        break;
+
+        case GT_NULLCHECK:
+        case GT_IND:
+            assert(dstCount == (tree->OperIs(GT_NULLCHECK) ? 0 : 1));
+            srcCount = BuildIndir(tree->AsIndir());
+            break;
+
+        case GT_CATCH_ARG:
+            srcCount = 0;
+            assert(dstCount == 1);
+            BuildDef(tree, RBM_EXCEPTION_OBJECT);
+            break;
+
+        case GT_INDEX_ADDR:
+            assert(dstCount == 1);
+            srcCount = BuildBinaryUses(tree->AsOp());
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
+            BuildDef(tree);
+            break;
+
+    } // end switch (tree->OperGet())
+
+    if (tree->IsUnusedValue() && (dstCount != 0))
+    {
+        isLocalDefUse = true;
+    }
+    // We need to be sure that we've set srcCount and dstCount appropriately
+    assert((dstCount < 2) || tree->IsMultiRegNode());
+    assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+    assert(!tree->IsUnusedValue() || (dstCount != 0));
+    assert(dstCount == tree->GetRegisterDstCount(compiler));
+    return srcCount;
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+//    tree       - The GT_SIMD node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
+{
+    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+    return 0;
+}
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsic.h"
+//------------------------------------------------------------------------
+// BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree.
+//
+// Arguments:
+//    tree       - The GT_HWINTRINSIC node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
+{
+    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+    return 0;
+}
+#endif
+
+//------------------------------------------------------------------------
+// BuildIndir: Specify register requirements for address expression
+//                       of an indirection operation.
+//
+// Arguments:
+//    indirTree - GT_IND, GT_STOREIND or block gentree node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildIndir(GenTreeIndir* indirTree)
+{
+    // struct typed indirs are expected only on rhs of a block copy,
+    // but in this case they must be contained.
+    assert(indirTree->TypeGet() != TYP_STRUCT);
+
+    GenTree* addr  = indirTree->Addr();
+    GenTree* index = nullptr;
+    int      cns   = 0;
+
+    if (addr->isContained())
+    {
+        if (addr->OperGet() == GT_LEA)
+        {
+            GenTreeAddrMode* lea = addr->AsAddrMode();
+            index                = lea->Index();
+            cns                  = lea->Offset();
+
+            // On LOONGARCH we may need a single internal register
+            // (when both conditions are true then we still only need a single internal register)
+            if ((index != nullptr) && (cns != 0))
+            {
+                // LOONGARCH does not support both Index and offset so we need an internal register
+                buildInternalIntRegisterDefForNode(indirTree);
+            }
+            else if (!emitter::isValidSimm12(cns))
+            {
+                // This offset can't be contained in the ldr/str instruction, so we need an internal register
+                buildInternalIntRegisterDefForNode(indirTree);
+            }
+        }
+        else if (addr->OperGet() == GT_CLS_VAR_ADDR)
+        {
+            // Reserve int to load constant from memory (IF_LARGELDC)
+            buildInternalIntRegisterDefForNode(indirTree);
+        }
+    }
+
+#ifdef FEATURE_SIMD
+    if (indirTree->TypeGet() == TYP_SIMD12)
+    {
+        // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
+        assert(!addr->isContained());
+
+        // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
+        // To assemble the vector properly we would need an additional int register
+        buildInternalIntRegisterDefForNode(indirTree);
+    }
+#endif // FEATURE_SIMD
+
+    int srcCount = BuildIndirUses(indirTree);
+    buildInternalRegisterUses();
+
+    if (!indirTree->OperIs(GT_STOREIND, GT_NULLCHECK))
+    {
+        BuildDef(indirTree);
+    }
+    return srcCount;
+}
+
+//------------------------------------------------------------------------
+// BuildCall: Set the NodeInfo for a call.
+//
+// Arguments:
+//    call - The call node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildCall(GenTreeCall* call)
+{
+    bool                  hasMultiRegRetVal = false;
+    const ReturnTypeDesc* retTypeDesc       = nullptr;
+    regMaskTP             dstCandidates     = RBM_NONE;
+
+    int srcCount = 0;
+    int dstCount = 0;
+    if (call->TypeGet() != TYP_VOID)
+    {
+        hasMultiRegRetVal = call->HasMultiRegRetVal();
+        if (hasMultiRegRetVal)
+        {
+            // dst count = number of registers in which the value is returned by call
+            retTypeDesc = call->GetReturnTypeDesc();
+            dstCount    = retTypeDesc->GetReturnRegCount();
+        }
+        else
+        {
+            dstCount = 1;
+        }
+    }
+
+    GenTree*  ctrlExpr           = call->gtControlExpr;
+    regMaskTP ctrlExprCandidates = RBM_NONE;
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        // either gtControlExpr != null or gtCallAddr != null.
+        // Both cannot be non-null at the same time.
+        assert(ctrlExpr == nullptr);
+        assert(call->gtCallAddr != nullptr);
+        ctrlExpr = call->gtCallAddr;
+    }
+
+    // set reg requirements on call target represented as control sequence.
+    if (ctrlExpr != nullptr)
+    {
+        // we should never see a gtControlExpr whose type is void.
+        assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+        // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+        // computed into a register.
+        if (call->IsFastTailCall())
+        {
+            // Fast tail call - make sure that call target is always computed in volatile registers
+            // that will not be overridden by epilog sequence.
+            ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH;
+            assert(ctrlExprCandidates != RBM_NONE);
+        }
+    }
+    else if (call->IsR2ROrVirtualStubRelativeIndir())
+    {
+        // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM
+        // and will load call address into the temp register from this register.
+        regMaskTP candidates = RBM_NONE;
+        if (call->IsFastTailCall())
+        {
+            candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH;
+            assert(candidates != RBM_NONE);
+        }
+
+        buildInternalIntRegisterDefForNode(call, candidates);
+    }
+
+    RegisterType registerType = call->TypeGet();
+
+    // Set destination candidates for return value of the call.
+
+    if (hasMultiRegRetVal)
+    {
+        assert(retTypeDesc != nullptr);
+        dstCandidates = retTypeDesc->GetABIReturnRegs();
+    }
+    else if (varTypeUsesFloatArgReg(registerType))
+    {
+        dstCandidates = RBM_FLOATRET;
+    }
+    else if (registerType == TYP_LONG)
+    {
+        dstCandidates = RBM_LNGRET;
+    }
+    else
+    {
+        dstCandidates = RBM_INTRET;
+    }
+
+    // First, count reg args
+    // Each register argument corresponds to one source.
+    bool callHasFloatRegArgs = false;
+
+    for (GenTreeCall::Use& arg : call->LateArgs())
+    {
+        GenTree* argNode = arg.GetNode();
+
+#ifdef DEBUG
+        // During Build, we only use the ArgTabEntry for validation,
+        // as getting it is rather expensive.
+        fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+        regNumber      argReg         = curArgTabEntry->GetRegNum();
+        assert(curArgTabEntry != nullptr);
+#endif
+
+        if (argNode->gtOper == GT_PUTARG_STK)
+        {
+            // late arg that is not passed in a register
+            assert(curArgTabEntry->GetRegNum() == REG_STK);
+            // These should never be contained.
+            assert(!argNode->isContained());
+            continue;
+        }
+
+        // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
+        if (argNode->OperGet() == GT_FIELD_LIST)
+        {
+            assert(argNode->isContained());
+
+            // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+            for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses())
+            {
+#ifdef DEBUG
+                assert(use.GetNode()->OperIs(GT_PUTARG_REG));
+                assert(use.GetNode()->GetRegNum() == argReg);
+                // Update argReg for the next putarg_reg (if any)
+                argReg = genRegArgNext(argReg);
+#endif
+                BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum()));
+                srcCount++;
+            }
+        }
+#if FEATURE_ARG_SPLIT
+        else if (argNode->OperGet() == GT_PUTARG_SPLIT)
+        {
+            unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs;
+            assert(regCount == curArgTabEntry->numRegs);
+            for (unsigned int i = 0; i < regCount; i++)
+            {
+                BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i);
+            }
+            srcCount += regCount;
+        }
+#endif // FEATURE_ARG_SPLIT
+        else
+        {
+            assert(argNode->OperIs(GT_PUTARG_REG));
+            assert(argNode->GetRegNum() == argReg);
+            HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+            {
+                BuildUse(argNode, genRegMask(argNode->GetRegNum()));
+                srcCount++;
+            }
+        }
+    }
+
+#ifdef DEBUG
+    // Now, count stack args
+    // Note that these need to be computed into a register, but then
+    // they're just stored to the stack - so the reg doesn't
+    // need to remain live until the call.  In fact, it must not
+    // because the code generator doesn't actually consider it live,
+    // so it can't be spilled.
+
+    for (GenTreeCall::Use& use : call->Args())
+    {
+        GenTree* arg = use.GetNode();
+
+        // Skip arguments that have been moved to the Late Arg list
+        if ((arg->gtFlags & GTF_LATE_ARG) == 0)
+        {
+            fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+            assert(curArgTabEntry != nullptr);
+#if FEATURE_ARG_SPLIT
+            // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they
+            // define registers used by the call.
+            assert(arg->OperGet() != GT_PUTARG_SPLIT);
+#endif // FEATURE_ARG_SPLIT
+            if (arg->gtOper == GT_PUTARG_STK)
+            {
+                assert(curArgTabEntry->GetRegNum() == REG_STK);
+            }
+            else
+            {
+                assert(!arg->IsValue() || arg->IsUnusedValue());
+            }
+        }
+    }
+#endif // DEBUG
+
+    // If it is a fast tail call, it is already preferenced to use IP0.
+    // Therefore, no need set src candidates on call tgt again.
+    if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+    {
+        // Don't assign the call target to any of the argument registers because
+        // we will use them to also pass floating point arguments as required
+        // by LOONGARCH64 ABI.
+        ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS);
+    }
+
+    if (ctrlExpr != nullptr)
+    {
+        BuildUse(ctrlExpr, ctrlExprCandidates);
+        srcCount++;
+    }
+
+    buildInternalRegisterUses();
+
+    // Now generate defs and kills.
+    regMaskTP killMask = getKillSetForCall(call);
+    BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
+    return srcCount;
+}
+
+//------------------------------------------------------------------------
+// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+//    argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+// Notes:
+//    Set the child node(s) to be contained when we have a multireg arg
+//
+int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
+{
+    assert(argNode->gtOper == GT_PUTARG_STK);
+
+    GenTree* putArgChild = argNode->gtGetOp1();
+
+    int srcCount = 0;
+
+    // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
+    if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST))
+    {
+        // We will use store instructions that each write a register sized value
+
+        if (putArgChild->OperIs(GT_FIELD_LIST))
+        {
+            assert(putArgChild->isContained());
+            // We consume all of the items in the GT_FIELD_LIST
+            for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses())
+            {
+                BuildUse(use.GetNode());
+                srcCount++;
+            }
+        }
+        else
+        {
+            // We can use a ld/st sequence so we need two internal registers for LOONGARCH64.
+            buildInternalIntRegisterDefForNode(argNode);
+            buildInternalIntRegisterDefForNode(argNode);
+
+            if (putArgChild->OperGet() == GT_OBJ)
+            {
+                assert(putArgChild->isContained());
+                GenTree* objChild = putArgChild->gtGetOp1();
+                if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+                {
+                    // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+                    // as one contained operation, and there are no source registers.
+                    //
+                    assert(objChild->isContained());
+                }
+                else
+                {
+                    // We will generate all of the code for the GT_PUTARG_STK and its child node
+                    // as one contained operation
+                    //
+                    srcCount = BuildOperandUses(objChild);
+                }
+            }
+            else
+            {
+                // No source registers.
+                putArgChild->OperIs(GT_LCL_VAR);
+            }
+        }
+    }
+    else
+    {
+        assert(!putArgChild->isContained());
+        srcCount = BuildOperandUses(putArgChild);
+    }
+    buildInternalRegisterUses();
+    return srcCount;
+}
+
+#if FEATURE_ARG_SPLIT
+//------------------------------------------------------------------------
+// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
+//
+// Arguments:
+//    argNode - a GT_PUTARG_SPLIT node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+// Notes:
+//    Set the child node(s) to be contained
+//
+int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
+{
+    int srcCount = 0;
+    assert(argNode->gtOper == GT_PUTARG_SPLIT);
+
+    GenTree* putArgChild = argNode->gtGetOp1();
+
+    // Registers for split argument corresponds to source
+    int dstCount = argNode->gtNumRegs;
+
+    regNumber argReg  = argNode->GetRegNum();
+    regMaskTP argMask = RBM_NONE;
+    for (unsigned i = 0; i < argNode->gtNumRegs; i++)
+    {
+        regNumber thisArgReg = (regNumber)((unsigned)argReg + i);
+        argMask |= genRegMask(thisArgReg);
+        argNode->SetRegNumByIdx(thisArgReg, i);
+    }
+
+    if (putArgChild->OperGet() == GT_FIELD_LIST)
+    {
+        // Generated code:
+        // 1. Consume all of the items in the GT_FIELD_LIST (source)
+        // 2. Store to target slot and move to target registers (destination) from source
+        //
+        unsigned sourceRegCount = 0;
+
+        // To avoid redundant moves, have the argument operand computed in the
+        // register in which the argument is passed to the call.
+
+        for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses())
+        {
+            GenTree* node = use.GetNode();
+            assert(!node->isContained());
+            // The only multi-reg nodes we should see are OperIsMultiRegOp()
+            assert(!node->IsMultiRegNode());
+
+            // Consume all the registers, setting the appropriate register mask for the ones that
+            // go into registers.
+            regMaskTP sourceMask = RBM_NONE;
+            if (sourceRegCount < argNode->gtNumRegs)
+            {
+                sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount));
+            }
+            sourceRegCount++;
+            BuildUse(node, sourceMask, 0);
+        }
+        srcCount += sourceRegCount;
+        assert(putArgChild->isContained());
+    }
+    else
+    {
+        assert(putArgChild->TypeGet() == TYP_STRUCT);
+        assert(putArgChild->OperGet() == GT_OBJ);
+
+        // We can use a ld/st sequence so we need an internal register
+        buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask);
+
+        GenTree* objChild = putArgChild->gtGetOp1();
+        if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+        {
+            // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR
+            // as one contained operation
+            //
+            assert(objChild->isContained());
+        }
+        else
+        {
+            srcCount = BuildIndirUses(putArgChild->AsIndir());
+        }
+        assert(putArgChild->isContained());
+    }
+    buildInternalRegisterUses();
+    BuildDefs(argNode, dstCount, argMask);
+    return srcCount;
+}
+#endif // FEATURE_ARG_SPLIT
+
+//------------------------------------------------------------------------
+// BuildBlockStore: Build the RefPositions for a block store node.
+//
+// Arguments:
+//    blkNode       - The block store node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree* dstAddr = blkNode->Addr();
+    GenTree* src     = blkNode->Data();
+    unsigned size    = blkNode->Size();
+
+    GenTree* srcAddrOrFill = nullptr;
+
+    regMaskTP dstAddrRegMask = RBM_NONE;
+    regMaskTP srcRegMask     = RBM_NONE;
+    regMaskTP sizeRegMask    = RBM_NONE;
+
+    if (blkNode->OperIsInitBlkOp())
+    {
+        if (src->OperIs(GT_INIT_VAL))
+        {
+            assert(src->isContained());
+            src = src->AsUnOp()->gtGetOp1();
+        }
+
+        srcAddrOrFill = src;
+
+        switch (blkNode->gtBlkOpKind)
+        {
+            case GenTreeBlk::BlkOpKindUnroll:
+            {
+                if (dstAddr->isContained())
+                {
+                    // Since the dstAddr is contained the address will be computed in CodeGen.
+                    // This might require an integer register to store the value.
+                    buildInternalIntRegisterDefForNode(blkNode);
+                }
+
+                const bool isDstRegAddrAlignmentKnown = dstAddr->OperIsLocalAddr();
+
+                if (isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES))
+                {
+                    // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions.
+                    // here just used a temp register.
+                    buildInternalIntRegisterDefForNode(blkNode);
+                }
+            }
+            break;
+
+            case GenTreeBlk::BlkOpKindHelper:
+                assert(!src->isContained());
+                dstAddrRegMask = RBM_ARG_0;
+                srcRegMask     = RBM_ARG_1;
+                sizeRegMask    = RBM_ARG_2;
+                break;
+
+            default:
+                unreached();
+        }
+    }
+    else
+    {
+        if (src->OperIs(GT_IND))
+        {
+            assert(src->isContained());
+            srcAddrOrFill = src->AsIndir()->Addr();
+        }
+
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            // We don't need to materialize the struct size but we still need
+            // a temporary register to perform the sequence of loads and stores.
+            // We can't use the special Write Barrier registers, so exclude them from the mask
+            regMaskTP internalIntCandidates =
+                allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
+            buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+
+            if (size >= 2 * REGSIZE_BYTES)
+            {
+                // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance
+                // so we need to reserve an extra internal register.
+                buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+            }
+
+            // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
+            dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+
+            // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+            // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+            // which is killed by a StoreObj (and thus needn't be reserved).
+            if (srcAddrOrFill != nullptr)
+            {
+                assert(!srcAddrOrFill->isContained());
+                srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+            }
+        }
+        else
+        {
+            switch (blkNode->gtBlkOpKind)
+            {
+                case GenTreeBlk::BlkOpKindUnroll:
+                    buildInternalIntRegisterDefForNode(blkNode);
+                    break;
+
+                case GenTreeBlk::BlkOpKindHelper:
+                    dstAddrRegMask = RBM_ARG_0;
+                    if (srcAddrOrFill != nullptr)
+                    {
+                        assert(!srcAddrOrFill->isContained());
+                        srcRegMask = RBM_ARG_1;
+                    }
+                    sizeRegMask = RBM_ARG_2;
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+    }
+
+    if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE))
+    {
+        // Reserve a temp register for the block size argument.
+        buildInternalIntRegisterDefForNode(blkNode, sizeRegMask);
+    }
+
+    int useCount = 0;
+
+    if (!dstAddr->isContained())
+    {
+        useCount++;
+        BuildUse(dstAddr, dstAddrRegMask);
+    }
+    else if (dstAddr->OperIsAddrMode())
+    {
+        useCount += BuildAddrUses(dstAddr->AsAddrMode()->Base());
+    }
+
+    if (srcAddrOrFill != nullptr)
+    {
+        if (!srcAddrOrFill->isContained())
+        {
+            useCount++;
+            BuildUse(srcAddrOrFill, srcRegMask);
+        }
+        else if (srcAddrOrFill->OperIsAddrMode())
+        {
+            useCount += BuildAddrUses(srcAddrOrFill->AsAddrMode()->Base());
+        }
+    }
+
+    if (blkNode->OperIs(GT_STORE_DYN_BLK))
+    {
+        useCount++;
+        BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask);
+    }
+
+    buildInternalRegisterUses();
+    regMaskTP killMask = getKillSetForBlockStore(blkNode);
+    BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
+    return useCount;
+}
+
+//------------------------------------------------------------------------
+// BuildCast: Set the NodeInfo for a GT_CAST.
+//
+// Arguments:
+//    cast - The GT_CAST node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildCast(GenTreeCast* cast)
+{
+    int srcCount = BuildOperandUses(cast->CastOp());
+    BuildDef(cast);
+
+    return srcCount;
+}
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 68346bb8866c9..19deba41f7657 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -180,7 +180,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
     {
         if (srcType == TYP_FLOAT
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // Arm64: src = float, dst is overflow conversion.
             // This goes through helper and hence src needs to be converted to double.
             && tree->gtOverflow()
@@ -215,7 +215,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
         {
             if (!tree->gtOverflow())
             {
-#ifdef TARGET_ARM64 // ARM64 supports all non-overflow checking conversions directly.
+// ARM64 and LoongArch64 optimize all non-overflow checking conversions
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                 return nullptr;
 #else
                 switch (dstType)
@@ -243,7 +244,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     default:
                         unreached();
                 }
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
             }
             else
             {
@@ -861,6 +862,11 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
     curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa);
     curArgTabEntry->SetByteOffset(0);
 
+#ifdef TARGET_LOONGARCH64
+    curArgTabEntry->structFloatFieldType[0] = TYP_UNDEF;
+    curArgTabEntry->structFloatFieldType[1] = TYP_UNDEF;
+#endif
+
     hasRegArgs = true;
     if (argCount >= argTableSize)
     {
@@ -914,6 +920,34 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned
 }
 #endif // defined(UNIX_AMD64_ABI)
 
+#if defined(TARGET_LOONGARCH64)
+fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
+                                    GenTree*          node,
+                                    GenTreeCall::Use* use,
+                                    regNumber         regNum,
+                                    unsigned          numRegs,
+                                    unsigned          byteSize,
+                                    unsigned          byteAlignment,
+                                    bool              isStruct,
+                                    bool              isFloatHfa, /* unused */
+                                    bool              isVararg,
+                                    const regNumber   otherRegNum)
+{
+    fgArgTabEntry* curArgTabEntry =
+        AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg);
+    assert(curArgTabEntry != nullptr);
+
+    INDEBUG(curArgTabEntry->checkIsStruct();)
+    assert(numRegs <= 2);
+    if (numRegs == 2)
+    {
+        curArgTabEntry->setRegNum(1, otherRegNum);
+    }
+
+    return curArgTabEntry;
+}
+#endif // defined(TARGET_LOONGARCH64)
+
 fgArgTabEntry* fgArgInfo::AddStkArg(unsigned          argNum,
                                     GenTree*          node,
                                     GenTreeCall::Use* use,
@@ -1785,7 +1819,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
     if (varTypeIsStruct(type))
     {
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
 
         // Can this type be passed as a primitive type?
         // If so, the following call will return the corresponding primitive type.
@@ -1839,7 +1873,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
             addrNode = arg;
 
 #if FEATURE_MULTIREG_ARGS
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             assert(varTypeIsStruct(type));
             if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg()))
             {
@@ -1853,11 +1887,11 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
 #else
             // Always create an Obj of the temp to use it as a call argument.
             arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
-#endif // !TARGET_ARM64
+#endif // !(TARGET_ARM64 || TARGET_LOONGARCH64)
 #endif // FEATURE_MULTIREG_ARGS
         }
 
-#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM)
+#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64)
 
         // other targets, we pass the struct by value
         assert(varTypeIsStruct(type));
@@ -1868,7 +1902,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
         // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
         arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
 
-#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM)
+#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64)
 
     } // (varTypeIsStruct(type))
 
@@ -1993,8 +2027,18 @@ void fgArgInfo::EvalArgsToTemps()
                     if (setupArg->OperIsCopyBlkOp())
                     {
                         setupArg = compiler->fgMorphCopyBlock(setupArg);
-#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_LOONGARCH64)
+                        // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG"
+                        // for "struct { float, float }", and retyping to a primitive here will cause the
+                        // multi-reg morphing to not kick in (the struct in question needs to be passed in
+                        // two FP registers).
+                        // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in
+                        // the arg entry instead of calling it here.
+                        if ((lclVarType == TYP_STRUCT) && (curArgTabEntry->numRegs == 1))
+#else
                         if (lclVarType == TYP_STRUCT)
+#endif
                         {
                             // This scalar LclVar widening step is only performed for ARM architectures.
                             //
@@ -2004,7 +2048,7 @@ void fgArgInfo::EvalArgsToTemps()
                             scalarType =
                                 compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->IsVararg());
                         }
-#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI)
+#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                     }
 
                     // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4)  or (5,6,7 =>
@@ -2909,6 +2953,12 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
         passUsingFloatRegs = false;
 
+#elif defined(TARGET_LOONGARCH64)
+
+        assert(!callIsVararg && !isHfaArg);
+        passUsingFloatRegs    = varTypeUsesFloatReg(argx);
+        DWORD floatFieldFlags = STRUCT_NO_FLOAT_FIELD;
+
 #else
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -2972,14 +3022,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
         }
 #else // !UNIX_AMD64_ABI
-        size               = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot'
+        size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot'
         if (!isStructArg)
         {
             byteSize = genTypeSize(argx);
         }
 
 #endif // UNIX_AMD64_ABI
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         if (isStructArg)
         {
             if (isHfaArg)
@@ -3037,6 +3087,42 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             structPassingKind howToPassStruct;
             structBaseType  = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, structSize);
             passStructByRef = (howToPassStruct == SPK_ByReference);
+#if defined(TARGET_LOONGARCH64)
+            if (!passStructByRef)
+            {
+                assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType));
+
+                floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
+
+                passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
+                compFloatingPointUsed |= passUsingFloatRegs;
+
+                if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) != 0)
+                {
+                    // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG"
+                    // for "struct { float, float }", and retyping to a primitive here will cause the
+                    // multi-reg morphing to not kick in (the struct in question needs to be passed in
+                    // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT".
+                    // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in
+                    // the arg entry instead of calling it here.
+                    structBaseType = TYP_STRUCT;
+                }
+
+                if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0)
+                {
+                    size = 1;
+                }
+                else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
+                {
+                    size = 2;
+                }
+            }
+            else // if (passStructByRef)
+            {
+                size     = 1;
+                byteSize = TARGET_POINTER_SIZE;
+            }
+#else
             if (howToPassStruct == SPK_ByReference)
             {
                 byteSize = TARGET_POINTER_SIZE;
@@ -3065,6 +3151,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             {
                 size = 1;
             }
+#endif
         }
 
         const var_types argType = args->GetNode()->TypeGet();
@@ -3085,6 +3172,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             argAlignBytes = eeGetArgSizeAlignment(argType, isFloatHfa);
         }
 
+#ifdef TARGET_LOONGARCH64
+        regNumber nextOtherRegNum = REG_STK;
+#endif
         //
         // Figure out if the argument will be passed in a register.
         //
@@ -3179,7 +3269,74 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     }
                 }
             }
-#else // not TARGET_ARM or TARGET_ARM64
+
+#elif defined(TARGET_LOONGARCH64)
+            if (passUsingFloatRegs)
+            {
+                // Check if the last register needed is still in the fp argument register range.
+                passUsingFloatRegs = isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
+
+                if (isStructArg)
+                {
+                    if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) &&
+                        passUsingFloatRegs)
+                    {
+                        passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs;
+                    }
+
+                    if (!passUsingFloatRegs)
+                    {
+                        size            = structSize > 8 ? 2 : 1;
+                        floatFieldFlags = 0;
+                    }
+                    else if (passUsingFloatRegs)
+                    {
+                        if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
+                        {
+                            nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1);
+                        }
+                        else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) != 0)
+                        {
+                            assert(size == 1);
+                            size               = 2;
+                            passUsingFloatRegs = false;
+                            nextOtherRegNum    = genMapFloatRegArgNumToRegNum(nextFltArgRegNum);
+                        }
+                        else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
+                        {
+                            assert(size == 1);
+                            size            = 2;
+                            nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum);
+                        }
+                    }
+                }
+
+                assert(!isHfaArg); // LoongArch64 does not support HFA.
+            }
+
+            // if we run out of floating-point argument registers, try the int argument registers.
+            if (!isRegArg)
+            {
+                // Check if the last register needed is still in the int argument register range.
+                isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+                if (!passUsingFloatRegs && isRegArg && (size > 1))
+                {
+                    nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1);
+                }
+
+                // Did we run out of registers when we had a 16-byte struct (size===2) ?
+                // (i.e we only have one register remaining but we needed two registers to pass this arg)
+                //
+                if (!isRegArg && (size > 1))
+                {
+                    // We also must update intArgRegNum so that we no longer try to
+                    // allocate any new general purpose registers for args
+                    //
+                    isRegArg        = intArgRegNum < maxRegArgs; // the split-struct case.
+                    nextOtherRegNum = REG_STK;
+                }
+            }
+#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64
 
 #if defined(UNIX_AMD64_ABI)
 
@@ -3331,15 +3488,73 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             // This is a register argument - put it in the table
             newArgEntry =
                 call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg,
-                                           isFloatHfa, callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
-                                                           UNIX_AMD64_ABI_ONLY_ARG(structIntRegs)
-                                                               UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs)
-                                                                   UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
+                                           isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum)
+                                                           UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
+                                                               UNIX_AMD64_ABI_ONLY_ARG(structIntRegs)
+                                                                   UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs)
+                                                                       UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
             newArgEntry->SetIsBackFilled(isBackFilled);
 
             // Set up the next intArgRegNum and fltArgRegNum values.
             if (!isBackFilled)
             {
+#if defined(TARGET_LOONGARCH64)
+                // Increment intArgRegNum by 'size' registers
+                if (!isNonStandard)
+                {
+                    if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK))
+                    {
+                        assert(!passUsingFloatRegs);
+                        assert(size == 2);
+                        intArgRegNum = maxRegArgs;
+                    }
+                    else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0)
+                    {
+                        if (passUsingFloatRegs)
+                        {
+                            fltArgRegNum += 1;
+                        }
+                        else
+                        {
+                            intArgRegNum += size;
+                        }
+                    }
+                    else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
+                    {
+                        structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT;
+                        fltArgRegNum += 1;
+                        newArgEntry->structFloatFieldType[0] = structBaseType;
+                    }
+                    else if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) != 0)
+                    {
+                        fltArgRegNum += 1;
+                        intArgRegNum += 1;
+                        if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
+                        {
+                            newArgEntry->structFloatFieldType[0] =
+                                (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                            newArgEntry->structFloatFieldType[1] =
+                                (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                        }
+                        else
+                        {
+                            newArgEntry->structFloatFieldType[0] =
+                                (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                            newArgEntry->structFloatFieldType[1] =
+                                (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                        }
+                    }
+                    else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
+                    {
+                        fltArgRegNum += 2;
+                        newArgEntry->structFloatFieldType[0] =
+                            (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                        newArgEntry->structFloatFieldType[1] =
+                            (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    }
+                }
+#else
+
 #if defined(UNIX_AMD64_ABI)
                 if (isStructArg)
                 {
@@ -3388,6 +3603,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                         }
                     }
                 }
+#endif // defined(TARGET_LOONGARCH64)
             }
         }
         else // We have an argument that is not passed in a register
@@ -3635,9 +3851,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             }
             else // This is passed by value.
             {
+#if defined(TARGET_LOONGARCH64)
+                // For LoongArch64 the struct {float a; float b;} can be passed by two float registers.
+                DEBUG_ARG_SLOTS_ASSERT((size == roundupSize / TARGET_POINTER_SIZE) ||
+                                       ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) &&
+                                        (size == 2) && (size == argEntry->numRegs)));
+#else
                 // Check to see if we can transform this into load of a primitive type.
                 // 'size' must be the number of pointer sized items
                 DEBUG_ARG_SLOTS_ASSERT(size == roundupSize / TARGET_POINTER_SIZE);
+#endif
 
                 structSize           = originalSize;
                 unsigned passingSize = originalSize;
@@ -3658,7 +3881,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         canTransform = (!argEntry->IsHfaArg() || (passingSize == genTypeSize(argEntry->GetHfaType())));
                     }
 
-#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                     // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can
                     // only transform in that case if the arg is a local.
                     // TODO-CQ: This transformation should be applicable in general, not just for the ARM64
@@ -3668,7 +3891,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         canTransform = (lclVar != nullptr);
                         passingSize  = genTypeSize(structBaseType);
                     }
-#endif //  TARGET_ARM64 || UNIX_AMD64_ABI
+#endif //  TARGET_ARM64 || UNIX_AMD64_ABI || TARGET_LOONGARCH64
                 }
 
                 if (!canTransform)
@@ -3706,7 +3929,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         }
                     }
 #endif // UNIX_AMD64_ABI
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                     if ((passingSize != structSize) && (lclVar == nullptr))
                     {
                         copyBlkClass = objClass;
@@ -3829,7 +4052,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                            ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType)));
                 }
 
-#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH)
+#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
                 // TODO-CQ-XARCH: there is no need for a temp copy if we improve our code generation in
                 // `genPutStructArgStk` for xarch like we did it for Arm/Arm64.
 
@@ -4238,13 +4461,12 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
 //    this also forces the struct to be stack allocated into the local frame.
 //    For the GT_OBJ case will clone the address expression and generate two (or more)
 //    indirections.
-//    Currently the implementation handles ARM64/ARM and will NYI for other architectures.
 //
 GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
 {
     assert(varTypeIsStruct(arg->TypeGet()));
 
-#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI)
+#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
     NYI("fgMorphMultiregStructArg requires implementation for this target");
 #endif
 
@@ -4361,19 +4583,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     {
         assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
         BYTE gcPtrs[MAX_ARG_REG_COUNT];
-        elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+        elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
+#ifdef TARGET_LOONGARCH64
+        // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE
+        // may be passed by two registers.
+        // e.g  `struct {int a; float b;}` passed by an integer register and a float register.
+        if (fgEntryPtr->numRegs == 2)
+        {
+            elemCount = 2;
+        }
+#endif
 
         for (unsigned inx = 0; inx < elemCount; inx++)
         {
-#ifdef UNIX_AMD64_ABI
+#if defined(UNIX_AMD64_ABI)
             if (gcPtrs[inx] == TYPE_GC_NONE)
             {
                 type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx],
                                                               fgEntryPtr->structDesc.eightByteSizes[inx]);
             }
             else
-#endif // UNIX_AMD64_ABI
+#elif defined(TARGET_LOONGARCH64)
+            if (varTypeIsFloating(fgEntryPtr->structFloatFieldType[inx]) ||
+                (genTypeSize(fgEntryPtr->structFloatFieldType[inx]) == 4))
+            {
+                type[inx] = fgEntryPtr->structFloatFieldType[inx];
+            }
+            else
+#endif // TARGET_LOONGARCH64
             {
                 type[inx] = getJitGCType(gcPtrs[inx]);
             }
@@ -4386,8 +4624,14 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             // We can safely widen this to aligned bytes since we are loading from
             // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
             // lives in the stack frame or will be a promoted field.
-            //
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef TARGET_LOONGARCH64
+            // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE
+            // may be passed by two registers.
+            // e.g  `struct {int a; float b;}` passed by an integer register and a float register.
             structSize = elemCount * TARGET_POINTER_SIZE;
+#endif
         }
         else // we must have a GT_OBJ
         {
@@ -4409,11 +4653,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                     case 2:
                         type[lastElem] = TYP_SHORT;
                         break;
-#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                     case 4:
                         type[lastElem] = TYP_INT;
                         break;
-#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI)
+#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) || (TARGET_LOONGARCH64)
                     default:
                         noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
                         break;
@@ -4517,7 +4761,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         }
         else
 #endif // !UNIX_AMD64_ABI
-#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
             // Is this LclVar a promoted struct with exactly 2 fields?
             if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
         {
@@ -4695,7 +4939,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             {
                 GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
                 newArg->AddField(this, nextLclFld, offset, type[inx]);
-                offset += genTypeSize(type[inx]);
+#ifdef TARGET_LOONGARCH64
+                if (structSize > TARGET_POINTER_SIZE)
+                {
+                    // For LoongArch64's ABI, maybe there is a padding.
+                    // e.g. `struct {float a; long b;}`
+                    offset += TARGET_POINTER_SIZE;
+                }
+                else
+#endif
+                {
+                    offset += genTypeSize(type[inx]);
+                }
             }
         }
         // Are we passing a GT_OBJ struct?
@@ -4745,7 +5000,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 curItem->gtFlags |= GTF_GLOB_REF;
 
                 newArg->AddField(this, curItem, offset, type[inx]);
-                offset += genTypeSize(type[inx]);
+#ifdef TARGET_LOONGARCH64
+                if (structSize > TARGET_POINTER_SIZE)
+                {
+                    // For LoongArch64's ABI, maybe there is a padding.
+                    // e.g. `struct {float a; long b;}`
+                    offset += TARGET_POINTER_SIZE;
+                }
+                else
+#endif
+                {
+                    offset += genTypeSize(type[inx]);
+                }
             }
         }
     }
@@ -12256,8 +12522,11 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
 
             break;
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         case GT_DIV:
+#ifdef TARGET_LOONGARCH64
+        case GT_MOD:
+#endif
             if (!varTypeIsFloating(tree->gtType))
             {
                 // Codegen for this instruction needs to be able to throw two exceptions:
@@ -12266,10 +12535,14 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
             }
             break;
         case GT_UDIV:
+#ifdef TARGET_LOONGARCH64
+        case GT_UMOD:
+#endif
             // Codegen for this instruction needs to be able to throw one exception:
             fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
             break;
-#endif
+
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
         case GT_ADD:
 
@@ -17514,7 +17787,7 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent)
 
 void Compiler::fgResetImplicitByRefRefCount()
 {
-#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64)
+#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #ifdef DEBUG
     if (verbose)
     {
@@ -17537,7 +17810,7 @@ void Compiler::fgResetImplicitByRefRefCount()
         }
     }
 
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 //------------------------------------------------------------------------
@@ -17551,7 +17824,7 @@ void Compiler::fgResetImplicitByRefRefCount()
 
 void Compiler::fgRetypeImplicitByRefArgs()
 {
-#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64)
+#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #ifdef DEBUG
     if (verbose)
     {
@@ -17750,7 +18023,7 @@ void Compiler::fgRetypeImplicitByRefArgs()
         }
     }
 
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 //------------------------------------------------------------------------
@@ -17763,7 +18036,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
 {
     JITDUMP("\n*************** In fgMarkDemotedImplicitByRefArgs()\n");
 
-#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64)
+#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
     {
@@ -17824,7 +18097,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
         }
     }
 
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 /*****************************************************************************
@@ -17834,11 +18107,11 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
  */
 bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
 {
-#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64)
+#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
 
     return false;
 
-#else  // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#else  // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 
     bool changed = false;
 
@@ -17873,7 +18146,7 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
     }
 
     return changed;
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp
index 939ea56badf2c..532fa8fd40976 100644
--- a/src/coreclr/jit/regalloc.cpp
+++ b/src/coreclr/jit/regalloc.cpp
@@ -256,6 +256,16 @@ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
     }
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+    // TODO-LOONGARCH64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog
+    // can handle non-frame pointer frames.
+    if (!result)
+    {
+        INDEBUG(reason = "Temporary LOONGARCH64 force frame pointer");
+        result = true;
+    }
+#endif // TARGET_LOONGARCH64
+
 #ifdef DEBUG
     if ((result == true) && (wbReason != nullptr))
     {
diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h
index d06bef0cea1d9..971974722eee8 100644
--- a/src/coreclr/jit/register.h
+++ b/src/coreclr/jit/register.h
@@ -103,6 +103,9 @@ REGDEF(STK,    16+XMMBASE,  0x0000,       "STK"  )
 #elif defined(TARGET_ARM64)
  #include "registerarm64.h"
 
+#elif defined(TARGET_LOONGARCH64)
+ #include "registerloongarch64.h"
+
 #else
   #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
new file mode 100644
index 0000000000000..8f3cd157016bb
--- /dev/null
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -0,0 +1,115 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error  Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+#define RMASK(x) (1ULL << (x))
+
+/*
+REGDEF(name, rnum,       mask, sname) */
+REGDEF(R0,      0,     0x0001, "zero")
+REGDEF(RA,      1,     0x0002, "ra"  )
+REGDEF(TP,      2,     0x0004, "tp"  )
+REGDEF(SP,      3,     0x0008, "sp"  )
+REGDEF(A0,      4,     0x0010, "a0"  )
+REGDEF(A1,      5,     0x0020, "a1"  )
+REGDEF(A2,      6,     0x0040, "a2"  )
+REGDEF(A3,      7,     0x0080, "a3"  )
+REGDEF(A4,      8,     0x0100, "a4"  )
+REGDEF(A5,      9,     0x0200, "a5"  )
+REGDEF(A6,     10,     0x0400, "a6"  )
+REGDEF(A7,     11,     0x0800, "a7"  )
+REGDEF(T0,     12,     0x1000, "t0"  )
+REGDEF(T1,     13,     0x2000, "t1"  )
+REGDEF(T2,     14,     0x4000, "t2"  )
+REGDEF(T3,     15,     0x8000, "t3"  )
+REGDEF(T4,     16,    0x10000, "t4"  )
+REGDEF(T5,     17,    0x20000, "t5"  )
+REGDEF(T6,     18,    0x40000, "t6"  )
+REGDEF(T7,     19,    0x80000, "t7"  )
+REGDEF(T8,     20,   0x100000, "t8"  )
+REGDEF(X0,     21,   0x200000, "x0"  )
+REGDEF(FP,     22,   0x400000, "fp"  )
+REGDEF(S0,     23,   0x800000, "s0"  )
+REGDEF(S1,     24,  0x1000000, "s1"  )
+REGDEF(S2,     25,  0x2000000, "s2"  )
+REGDEF(S3,     26,  0x4000000, "s3"  )
+REGDEF(S4,     27,  0x8000000, "s4"  )
+REGDEF(S5,     28, 0x10000000, "s5"  )
+REGDEF(S6,     29, 0x20000000, "s6"  )
+REGDEF(S7,     30, 0x40000000, "s7"  )
+REGDEF(S8,     31, 0x80000000, "s8"  )
+
+//NOTE for LoongArch64:
+//  The `REG_R21` which alias `REG_X0` is specially reserved !!!
+//  It should be only used with hand written assembly code and should be very careful!!!
+//  e.g. right now LoongArch64's backend-codegen/emit, there is usually
+//  a need for an extra register for cases like
+//  constructing a large imm or offset, saving some intermediate result
+//  of the overflowing check and integer-comparing result.
+//  Using the a specially reserved register maybe more efficient.
+REGALIAS(R21, X0)
+
+#define FBASE 32
+#define FMASK(x) (1ULL << (FBASE+(x)))
+
+/*
+REGDEF(name,  rnum,       mask,  sname) */
+REGDEF(F0,    0+FBASE, FMASK(0),   "f0")
+REGDEF(F1,    1+FBASE, FMASK(1),   "f1")
+REGDEF(F2,    2+FBASE, FMASK(2),   "f2")
+REGDEF(F3,    3+FBASE, FMASK(3),   "f3")
+REGDEF(F4,    4+FBASE, FMASK(4),   "f4")
+REGDEF(F5,    5+FBASE, FMASK(5),   "f5")
+REGDEF(F6,    6+FBASE, FMASK(6),   "f6")
+REGDEF(F7,    7+FBASE, FMASK(7),   "f7")
+REGDEF(F8,    8+FBASE, FMASK(8),   "f8")
+REGDEF(F9,    9+FBASE, FMASK(9),   "f9")
+REGDEF(F10,  10+FBASE, FMASK(10), "f10")
+REGDEF(F11,  11+FBASE, FMASK(11), "f11")
+REGDEF(F12,  12+FBASE, FMASK(12), "f12")
+REGDEF(F13,  13+FBASE, FMASK(13), "f13")
+REGDEF(F14,  14+FBASE, FMASK(14), "f14")
+REGDEF(F15,  15+FBASE, FMASK(15), "f15")
+REGDEF(F16,  16+FBASE, FMASK(16), "f16")
+REGDEF(F17,  17+FBASE, FMASK(17), "f17")
+REGDEF(F18,  18+FBASE, FMASK(18), "f18")
+REGDEF(F19,  19+FBASE, FMASK(19), "f19")
+REGDEF(F20,  20+FBASE, FMASK(20), "f20")
+REGDEF(F21,  21+FBASE, FMASK(21), "f21")
+REGDEF(F22,  22+FBASE, FMASK(22), "f22")
+REGDEF(F23,  23+FBASE, FMASK(23), "f23")
+REGDEF(F24,  24+FBASE, FMASK(24), "f24")
+REGDEF(F25,  25+FBASE, FMASK(25), "f25")
+REGDEF(F26,  26+FBASE, FMASK(26), "f26")
+REGDEF(F27,  27+FBASE, FMASK(27), "f27")
+REGDEF(F28,  28+FBASE, FMASK(28), "f28")
+REGDEF(F29,  29+FBASE, FMASK(29), "f29")
+REGDEF(F30,  30+FBASE, FMASK(30), "f30")
+REGDEF(F31,  31+FBASE, FMASK(31), "f31")
+
+// The registers with values 64 (NBASE) and above are not real register numbers
+#define NBASE 64
+
+// This must be last!
+REGDEF(STK,   0+NBASE, 0x0000,    "STK")
+
+/*****************************************************************************/
+#undef  RMASK
+#undef  FMASK
+#undef  FBASE
+#undef  NBASE
+#undef  REGDEF
+#undef  REGALIAS
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp
index 58439020fd693..d28a90ec36f5d 100644
--- a/src/coreclr/jit/regset.cpp
+++ b/src/coreclr/jit/regset.cpp
@@ -23,7 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 /*****************************************************************************/
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64)
 const regMaskSmall regMasks[] = {
 #define REGDEF(name, rnum, mask, xname, wname) mask,
 #include "register.h"
@@ -228,9 +228,9 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r
 
     rsMaskResvd = RBM_NONE;
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     rsMaskCalleeSaved = RBM_NONE;
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #ifdef TARGET_ARM
     rsMaskPreSpillRegArg = RBM_NONE;
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index 34a9bcea64629..9c1a1041eecf8 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -123,9 +123,9 @@ class RegSet
 private:
     regMaskTP _rsMaskVars; // backing store for rsMaskVars property
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
-#endif                           // TARGET_ARM
+#endif                           // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 public:                    // TODO-Cleanup: Should be private, but Compiler uses it
     regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp
index 67bb4523b6f7c..6b2fcd5690eb6 100644
--- a/src/coreclr/jit/scopeinfo.cpp
+++ b/src/coreclr/jit/scopeinfo.cpp
@@ -295,7 +295,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc(
         case TYP_LONG:
         case TYP_DOUBLE:
 #endif // TARGET_64BIT
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // In the AMD64 ABI we are supposed to pass a struct by reference when its
             // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
             // the IR to comply with the ABI and therefore changes the type of the lclVar
@@ -314,7 +314,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc(
                 this->vlType = VLT_STK_BYREF;
             }
             else
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             {
                 this->vlType = VLT_STK;
             }
@@ -1600,11 +1600,38 @@ void CodeGen::psiBegProlog()
             if (!isStructHandled)
             {
 #ifdef DEBUG
+#ifdef TARGET_LOONGARCH64
+                var_types regType;
+                if (varTypeIsStruct(lclVarDsc))
+                {
+                    // Must be <= 16 bytes or else it wouldn't be passed in registers,
+                    // which can be bigger (and is handled above).
+                    noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16);
+                    if (emitter::isFloatReg(lclVarDsc->GetArgReg()))
+                    {
+                        regType = TYP_DOUBLE;
+                    }
+                    else
+                    {
+                        regType = lclVarDsc->GetLayout()->GetGCPtrType(0);
+                    }
+                }
+                else
+                {
+                    regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet());
+                    if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType))
+                    {
+                        // For LoongArch64's ABI, the float args may be passed by integer register.
+                        regType = TYP_LONG;
+                    }
+                }
+#else
                 var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet());
                 if (lclVarDsc->lvIsHfaRegArg())
                 {
                     regType = lclVarDsc->GetHfaType();
                 }
+#endif
                 assert(genMapRegNumToRegArgNum(lclVarDsc->GetArgReg(), regType) != (unsigned)-1);
 #endif // DEBUG
 
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index b6ab3166e10f8..e5bf31e7f66c2 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -34,7 +34,8 @@ inline bool compMacOsArm64Abi()
 }
 inline bool compFeatureArgSplit()
 {
-    return TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64);
+    return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 ||
+           (TargetOS::IsWindows && TargetArchitecture::IsArm64);
 }
 inline bool compUnixX86Abi()
 {
@@ -51,6 +52,8 @@ inline bool compUnixX86Abi()
 #define TARGET_READABLE_NAME "ARM"
 #elif defined(TARGET_ARM64)
 #define TARGET_READABLE_NAME "ARM64"
+#elif defined(TARGET_LOONGARCH64)
+#define TARGET_READABLE_NAME "LOONGARCH64"
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -70,6 +73,10 @@ inline bool compUnixX86Abi()
 #define REGMASK_BITS 64
 #define CSE_CONST_SHARED_LOW_BITS 12
 
+#elif defined(TARGET_LOONGARCH64)
+#define REGMASK_BITS 64
+#define CSE_CONST_SHARED_LOW_BITS 12
+
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -85,7 +92,7 @@ inline bool compUnixX86Abi()
 //                       be assigned during register allocation.
 //    REG_NA           - Used to indicate that a register is either not yet assigned or not required.
 //
-#if defined(TARGET_ARM)
+#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
 enum _regNumber_enum : unsigned
 {
 #define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
@@ -185,7 +192,7 @@ enum _regMask_enum : unsigned
 // In any case, we believe that is OK to freely cast between these types; no information will
 // be lost.
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 typedef unsigned __int64 regMaskTP;
 #else
 typedef unsigned       regMaskTP;
@@ -237,6 +244,8 @@ typedef unsigned char   regNumberSmall;
 #include "targetarm.h"
 #elif defined(TARGET_ARM64)
 #include "targetarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "targetloongarch64.h"
 #else
   #error Unsupported or unset target architecture
 #endif
@@ -536,7 +545,7 @@ inline regMaskTP genRegMask(regNumber reg)
 
 inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */)
 {
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64)
     assert(genIsValidFloatReg(reg));
     assert((unsigned)reg < ArrLen(regMasks));
     return regMasks[reg];
diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp
new file mode 100644
index 0000000000000..e0097a1b62a1c
--- /dev/null
+++ b/src/coreclr/jit/targetloongarch64.cpp
@@ -0,0 +1,27 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(TARGET_LOONGARCH64)
+
+#include "target.h"
+
+const char*            Target::g_tgtCPUName           = "loongarch64";
+const Target::ArgOrder Target::g_tgtArgOrder          = ARG_ORDER_R2L;
+const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
+
+// clang-format off
+const regNumber intArgRegs [] = {REG_A0, REG_A1, REG_A2, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7};
+const regMaskTP intArgMasks[] = {RBM_A0, RBM_A1, RBM_A2, RBM_A3, RBM_A4, RBM_A5, RBM_A6, RBM_A7};
+
+const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7 };
+const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7 };
+// clang-format on
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
new file mode 100644
index 0000000000000..25355994d385b
--- /dev/null
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -0,0 +1,332 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#pragma once
+
+#if !defined(TARGET_LOONGARCH64)
+#error The file should not be included for this platform.
+#endif
+
+// NOTE for LoongArch64:
+//  The `REG_R21` which alias `REG_X0` is specially reserved !!!
+//  It can be used only manully and very carefully!!!
+
+// clang-format off
+  #define CPU_LOAD_STORE_ARCH      1
+  #define CPU_HAS_FP_SUPPORT       1
+  #define ROUND_FLOAT              0       // Do not round intermed float expression results
+  #define CPU_HAS_BYTE_REGS        0
+
+  #define CPBLK_UNROLL_LIMIT       64      // Upper bound to let the code generator to loop unroll CpBlk.
+  #define INITBLK_UNROLL_LIMIT     64      // Upper bound to let the code generator to loop unroll InitBlk.
+
+#ifdef FEATURE_SIMD
+#pragma error("SIMD Unimplemented yet LOONGARCH")
+  #define ALIGN_SIMD_TYPES         1       // whether SIMD type locals are to be aligned
+  #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls
+#endif // FEATURE_SIMD
+
+  #define FEATURE_FIXED_OUT_ARGS   1       // Preallocate the outgoing arg area in the prolog
+  #define FEATURE_STRUCTPROMOTE    0       // JIT Optimization to promote fields of structs into registers
+  #define FEATURE_MULTIREG_STRUCT_PROMOTE 0  // True when we want to promote fields of a multireg struct into registers
+  #define FEATURE_FASTTAILCALL     1       // Tail calls made as epilog+jmp
+  #define FEATURE_TAILCALL_OPT     1       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+  #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
+  #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register
+  #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
+  #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (8-byte).
+  #define MAX_PASS_MULTIREG_BYTES      16  // Maximum size of a struct that could be passed in more than one register
+  #define MAX_RET_MULTIREG_BYTES       16  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 2 doubles)
+  #define MAX_ARG_REG_COUNT             2  // Maximum registers used to pass a single argument in multiple registers.
+  #define MAX_RET_REG_COUNT             2  // Maximum registers used to return a value.
+  #define MAX_MULTIREG_COUNT            2  // Maxiumum number of registers defined by a single instruction (including calls).
+                                           // This is also the maximum number of registers for a MultiReg node.
+
+  #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+  #define USER_ARGS_COME_LAST      1
+  #define EMIT_TRACK_STACK_DEPTH   1       // This is something of a workaround.  For both ARM and AMD64, the frame size is fixed, so we don't really
+                                           // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
+  #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+  #define FEATURE_EH_FUNCLETS      1
+  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
+  #define CSE_CONSTS               1       // Enable if we want to CSE constants
+
+  #define REG_FP_FIRST             REG_F0
+  #define REG_FP_LAST              REG_F31
+  #define FIRST_FP_ARGREG          REG_F0
+  #define LAST_FP_ARGREG           REG_F7
+
+  #define REGNUM_BITS              6       // number of bits in a REG_* within registerloongarch64.h
+  #define REGSIZE_BYTES            8       // number of bytes in one general purpose register
+  #define FP_REGSIZE_BYTES         8       // number of bytes in one FP register
+  #define FPSAVE_REGSIZE_BYTES     8       // number of bytes in one FP register that are saved/restored.
+
+  #define MIN_ARG_AREA_FOR_CALL    0       // Minimum required outgoing argument space for a call.
+
+  #define CODE_ALIGN               4       // code alignment requirement
+  #define STACK_ALIGN              16      // stack alignment requirement
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_S0|RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8)
+  #define RBM_INT_CALLEE_TRASH    (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_T7|RBM_T8)
+  #define RBM_FLT_CALLEE_SAVED    (RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31)
+  #define RBM_FLT_CALLEE_TRASH    (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7)
+
+  #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+  #define RBM_CALLEE_TRASH        (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+
+  #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2
+  #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2
+
+  #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+  #define RBM_ALLFLOAT            (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
+  #define RBM_ALLDOUBLE            RBM_ALLFLOAT
+
+  // REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED
+  #define REG_VAR_ORDER            REG_A0,REG_A1,REG_A2,REG_A3,REG_A4,REG_A5,REG_A6,REG_A7, \
+                                   REG_T0,REG_T1,REG_T2,REG_T3,REG_T4,REG_T5,REG_T6,REG_T7,REG_T8, \
+                                   REG_CALLEE_SAVED_ORDER
+
+  #define REG_VAR_ORDER_FLT        REG_F12,REG_F13,REG_F14,REG_F15,REG_F16,REG_F17,REG_F18,REG_F19, \
+                                   REG_F2,REG_F3,REG_F4,REG_F5,REG_F6,REG_F7,REG_F8,REG_F9,REG_F10, \
+                                   REG_F20,REG_F21,REG_F22,REG_F23, \
+                                   REG_F24,REG_F25,REG_F26,REG_F27,REG_F28,REG_F29,REG_F30,REG_F31, \
+                                   REG_F1,REG_F0
+
+  #define REG_CALLEE_SAVED_ORDER   REG_S0,REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8
+  #define RBM_CALLEE_SAVED_ORDER   RBM_S0,RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8
+
+  #define CNT_CALLEE_SAVED        (10)             //s0-s8,fp.
+  #define CNT_CALLEE_TRASH        (17)
+  #define CNT_CALLEE_ENREG        (CNT_CALLEE_SAVED-1)
+
+  #define CNT_CALLEE_SAVED_FLOAT  (8)
+  #define CNT_CALLEE_TRASH_FLOAT  (24)
+
+  #define CALLEE_SAVED_REG_MAXSZ    (CNT_CALLEE_SAVED * REGSIZE_BYTES)
+  #define CALLEE_SAVED_FLOAT_MAXSZ  (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES)
+
+  #define REG_TMP_0                REG_T0
+
+  // Temporary registers used for the GS cookie check.
+  #define REG_GSCOOKIE_TMP_0       REG_T0
+  #define REG_GSCOOKIE_TMP_1       REG_T1
+
+  // register to hold shift amount; no special register is required on LOONGARCH64.
+  #define REG_SHIFT                REG_NA
+  #define RBM_SHIFT                RBM_ALLINT
+
+  // This is a general scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH              REG_T0
+
+  // This is a float scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH_FLT          REG_F11
+
+  // This is a general register that can be optionally reserved for other purposes during codegen
+  #define REG_OPT_RSVD             REG_T1
+  #define RBM_OPT_RSVD             RBM_T1
+
+  // Where is the exception object on entry to the handler block?
+  #define REG_EXCEPTION_OBJECT     REG_A0
+  #define RBM_EXCEPTION_OBJECT     RBM_A0
+
+  #define REG_JUMP_THUNK_PARAM     REG_T2
+  #define RBM_JUMP_THUNK_PARAM     RBM_T2
+
+  // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S):
+  // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
+  //     On entry:
+  //       t6: the destination address (LHS of the assignment)
+  //       t7: the object reference (RHS of the assignment)
+  //     On exit:
+  //       t0: trashed
+  //       t1: trashed
+  //       t3: trashed
+  //       t4: trashed
+  //       t6: incremented by 8
+  //       t7: trashed
+  // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier):
+  //     On entry:
+  //       t8: the source address (points to object reference to write)
+  //       t6: the destination address (object reference written here)
+  //     On exit:
+  //       t8: incremented by 8
+  //       t6: incremented by 8
+  //
+
+  #define REG_WRITE_BARRIER_DST          REG_T6
+  #define RBM_WRITE_BARRIER_DST          RBM_T6
+
+  #define REG_WRITE_BARRIER_SRC          REG_T7
+  #define RBM_WRITE_BARRIER_SRC          RBM_T7
+
+  #define REG_WRITE_BARRIER_DST_BYREF    REG_T6
+  #define RBM_WRITE_BARRIER_DST_BYREF    RBM_T6
+
+  #define REG_WRITE_BARRIER_SRC_BYREF    REG_T8
+  #define RBM_WRITE_BARRIER_SRC_BYREF    RBM_T8
+
+  #define RBM_CALLEE_TRASH_NOGC          (RBM_T0|RBM_T1|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET)
+
+  // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+  #define RBM_CALLEE_TRASH_WRITEBARRIER         (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC)
+
+  // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+  #define RBM_CALLEE_GCTRASH_WRITEBARRIER       RBM_CALLEE_TRASH_NOGC
+
+  // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
+  #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF   (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC)
+
+  // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF.
+  // Note that a0 and a1 are still valid byref pointers after this helper call, despite their value being changed.
+  #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC
+
+  // GenericPInvokeCalliHelper VASigCookie Parameter
+  #define REG_PINVOKE_COOKIE_PARAM          REG_T3
+  #define RBM_PINVOKE_COOKIE_PARAM          RBM_T3
+
+  // GenericPInvokeCalliHelper unmanaged target Parameter
+  #define REG_PINVOKE_TARGET_PARAM          REG_T2
+  #define RBM_PINVOKE_TARGET_PARAM          RBM_T2
+
+  // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM)
+  #define REG_SECRET_STUB_PARAM     REG_T2
+  #define RBM_SECRET_STUB_PARAM     RBM_T2
+
+  // R2R indirect call. Use the same registers as VSD
+  #define REG_R2R_INDIRECT_PARAM          REG_T8
+  #define RBM_R2R_INDIRECT_PARAM          RBM_T8
+
+  #define REG_INDIRECT_CALL_TARGET_REG    REG_T6
+
+  // Registers used by PInvoke frame setup
+  #define REG_PINVOKE_FRAME        REG_T0
+  #define RBM_PINVOKE_FRAME        RBM_T0
+  #define REG_PINVOKE_TCB          REG_T1
+  #define RBM_PINVOKE_TCB          RBM_T1
+  #define REG_PINVOKE_SCRATCH      REG_T1
+  #define RBM_PINVOKE_SCRATCH      RBM_T1
+
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_R0
+  #define REG_INT_FIRST            REG_R0
+  #define REG_INT_LAST             REG_S8
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
+  #define REG_PROFILER_ENTER_ARG_FUNC_ID    REG_R10
+  #define RBM_PROFILER_ENTER_ARG_FUNC_ID    RBM_R10
+  #define REG_PROFILER_ENTER_ARG_CALLER_SP  REG_R11
+  #define RBM_PROFILER_ENTER_ARG_CALLER_SP  RBM_R11
+  #define REG_PROFILER_LEAVE_ARG_FUNC_ID    REG_R10
+  #define RBM_PROFILER_LEAVE_ARG_FUNC_ID    RBM_R10
+  #define REG_PROFILER_LEAVE_ARG_CALLER_SP  REG_R11
+  #define RBM_PROFILER_LEAVE_ARG_CALLER_SP  RBM_R11
+
+  // The registers trashed by profiler enter/leave/tailcall hook
+  #define RBM_PROFILER_ENTER_TRASH     (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP))
+  #define RBM_PROFILER_LEAVE_TRASH     (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP))
+  #define RBM_PROFILER_TAILCALL_TRASH  RBM_PROFILER_LEAVE_TRASH
+
+  // Which register are int and long values returned in ?
+  #define REG_INTRET               REG_A0
+  #define RBM_INTRET               RBM_A0
+  #define REG_LNGRET               REG_A0
+  #define RBM_LNGRET               RBM_A0
+  // second return register for 16-byte structs
+  #define REG_INTRET_1             REG_A1
+  #define RBM_INTRET_1             RBM_A1
+
+  #define REG_FLOATRET             REG_F0
+  #define RBM_FLOATRET             RBM_F0
+  #define RBM_DOUBLERET            RBM_F0
+  #define REG_FLOATRET_1           REG_F1
+  #define RBM_FLOATRET_1           RBM_F1
+  #define RBM_DOUBLERET_1          RBM_F1
+
+  // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+  #define RBM_STOP_FOR_GC_TRASH    RBM_CALLEE_TRASH
+
+  // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+  #define RBM_INIT_PINVOKE_FRAME_TRASH  RBM_CALLEE_TRASH
+
+  #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_A0 | RBM_A1 | RBM_A2 | RBM_A3 | RBM_A4 | RBM_A5 | RBM_A6 | RBM_A7 | RBM_T3))
+  #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3
+  #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0
+
+  #define REG_FPBASE               REG_FP
+  #define RBM_FPBASE               RBM_FP
+  #define STR_FPBASE               "fp"
+  #define REG_SPBASE               REG_SP
+  #define RBM_SPBASE               RBM_SP
+  #define STR_SPBASE               "sp"
+
+  #define FIRST_ARG_STACK_OFFS    (2*REGSIZE_BYTES)   // Caller's saved FP and return address
+
+  #define MAX_REG_ARG              8
+  #define MAX_FLOAT_REG_ARG        8
+
+  #define REG_ARG_FIRST            REG_A0
+  #define REG_ARG_LAST             REG_A7
+  #define REG_ARG_FP_FIRST         REG_F0
+  #define REG_ARG_FP_LAST          REG_F7
+  #define INIT_ARG_STACK_SLOT      0                  // No outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_A0
+  #define REG_ARG_1                REG_A1
+  #define REG_ARG_2                REG_A2
+  #define REG_ARG_3                REG_A3
+  #define REG_ARG_4                REG_A4
+  #define REG_ARG_5                REG_A5
+  #define REG_ARG_6                REG_A6
+  #define REG_ARG_7                REG_A7
+
+  extern const regNumber intArgRegs [MAX_REG_ARG];
+  extern const regMaskTP intArgMasks[MAX_REG_ARG];
+
+  #define RBM_ARG_0                RBM_A0
+  #define RBM_ARG_1                RBM_A1
+  #define RBM_ARG_2                RBM_A2
+  #define RBM_ARG_3                RBM_A3
+  #define RBM_ARG_4                RBM_A4
+  #define RBM_ARG_5                RBM_A5
+  #define RBM_ARG_6                RBM_A6
+  #define RBM_ARG_7                RBM_A7
+
+  #define REG_FLTARG_0             REG_F0
+  #define REG_FLTARG_1             REG_F1
+  #define REG_FLTARG_2             REG_F2
+  #define REG_FLTARG_3             REG_F3
+  #define REG_FLTARG_4             REG_F4
+  #define REG_FLTARG_5             REG_F5
+  #define REG_FLTARG_6             REG_F6
+  #define REG_FLTARG_7             REG_F7
+
+  #define RBM_FLTARG_0             RBM_F0
+  #define RBM_FLTARG_1             RBM_F1
+  #define RBM_FLTARG_2             RBM_F2
+  #define RBM_FLTARG_3             RBM_F3
+  #define RBM_FLTARG_4             RBM_F4
+  #define RBM_FLTARG_5             RBM_F5
+  #define RBM_FLTARG_6             RBM_F6
+  #define RBM_FLTARG_7             RBM_F7
+
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7)
+  #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
+
+  extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG];
+  extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG];
+
+  #define B_DIST_SMALL_MAX_NEG  (-131072)
+  #define B_DIST_SMALL_MAX_POS  (+131071)
+
+  #define OFFSET_DIST_SMALL_MAX_NEG   (-2048)
+  #define OFFSET_DIST_SMALL_MAX_POS   (+2047)
+
+  #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0
+
+// clang-format on
diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp
index 8d5efd0051906..6ad60a064f35c 100644
--- a/src/coreclr/jit/unwind.cpp
+++ b/src/coreclr/jit/unwind.cpp
@@ -412,7 +412,8 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
     else
     {
         if (TargetArchitecture::IsX64 ||
-            (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86)))
+            (TargetOS::IsUnix &&
+             (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64)))
         {
             assert(func->startLoc != nullptr);
             offset = func->startLoc->GetFuncletPrologOffset(GetEmitter());
@@ -442,6 +443,10 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
 
 // See unwindX86.cpp
 
+#elif defined(TARGET_LOONGARCH64)
+
+// See unwindLoongarch64.cpp
+
 #else // TARGET*
 
 #error Unsupported or unset target architecture
diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h
index c578c30cb78d0..ae9a19a4b37f3 100644
--- a/src/coreclr/jit/unwind.h
+++ b/src/coreclr/jit/unwind.h
@@ -10,7 +10,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 // Windows no longer imposes a maximum prolog size. However, we still have an
 // assert here just to inform us if we increase the size of the prolog
@@ -34,7 +34,15 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 100;
 #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
 #define UW_MAX_CODE_WORDS_COUNT 31
 #define UW_MAX_EPILOG_START_INDEX 0x3FFU
-#endif // TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+const unsigned MAX_PROLOG_SIZE_BYTES = 200;
+const unsigned MAX_EPILOG_SIZE_BYTES = 200;
+#define UWC_END 0xE4   // "end" unwind code
+#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
+#define UW_MAX_CODE_WORDS_COUNT 31
+#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#endif // TARGET_LOONGARCH64
 
 #define UW_MAX_EPILOG_COUNT 31                 // Max number that can be encoded in the "Epilog count" field
                                                // of the .pdata record
@@ -129,9 +137,9 @@ class UnwindCodesBase
     {
 #if defined(TARGET_ARM)
         return b >= 0xFD;
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code?
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
     }
 
 #ifdef DEBUG
@@ -813,7 +821,7 @@ class UnwindInfo : public UnwindBase
     // Given the first byte of the unwind code, check that its opsize matches
     // the last instruction added in the emitter.
     void CheckOpsize(BYTE b1);
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     void CheckOpsize(BYTE b1)
     {
     } // nothing to do; all instructions are 4 bytes
@@ -864,4 +872,4 @@ void DumpUnwindInfo(Compiler*         comp,
 
 #endif // DEBUG
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp
new file mode 100644
index 0000000000000..faae126aa5718
--- /dev/null
+++ b/src/coreclr/jit/unwindloongarch64.cpp
@@ -0,0 +1,2290 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              UnwindInfo                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(TARGET_LOONGARCH64)
+
+#if defined(FEATURE_CFI_SUPPORT)
+short Compiler::mapRegNumToDwarfReg(regNumber reg)
+{
+    short dwarfReg = DWARF_REG_ILLEGAL;
+
+    switch (reg)
+    {
+        case REG_R0:
+            dwarfReg = 0;
+            break;
+        case REG_RA:
+            dwarfReg = 1;
+            break;
+        case REG_TP:
+            dwarfReg = 2;
+            break;
+        case REG_SP:
+            dwarfReg = 3;
+            break;
+        case REG_A0:
+            dwarfReg = 4;
+            break;
+        case REG_A1:
+            dwarfReg = 5;
+            break;
+        case REG_A2:
+            dwarfReg = 6;
+            break;
+        case REG_A3:
+            dwarfReg = 7;
+            break;
+        case REG_A4:
+            dwarfReg = 8;
+            break;
+        case REG_A5:
+            dwarfReg = 9;
+            break;
+        case REG_A6:
+            dwarfReg = 10;
+            break;
+        case REG_A7:
+            dwarfReg = 11;
+            break;
+        case REG_T0:
+            dwarfReg = 12;
+            break;
+        case REG_T1:
+            dwarfReg = 13;
+            break;
+        case REG_T2:
+            dwarfReg = 14;
+            break;
+        case REG_T3:
+            dwarfReg = 15;
+            break;
+        case REG_T4:
+            dwarfReg = 16;
+            break;
+        case REG_T5:
+            dwarfReg = 17;
+            break;
+        case REG_T6:
+            dwarfReg = 18;
+            break;
+        case REG_T7:
+            dwarfReg = 19;
+            break;
+        case REG_T8:
+            dwarfReg = 20;
+            break;
+        case REG_X0:
+            dwarfReg = 21;
+            break;
+        case REG_FP:
+            dwarfReg = 22;
+            break;
+        case REG_S0:
+            dwarfReg = 23;
+            break;
+        case REG_S1:
+            dwarfReg = 24;
+            break;
+        case REG_S2:
+            dwarfReg = 25;
+            break;
+        case REG_S3:
+            dwarfReg = 26;
+            break;
+        case REG_S4:
+            dwarfReg = 27;
+            break;
+        case REG_S5:
+            dwarfReg = 28;
+            break;
+        case REG_S6:
+            dwarfReg = 29;
+            break;
+        case REG_S7:
+            dwarfReg = 30;
+            break;
+        case REG_S8:
+            dwarfReg = 31;
+            break;
+        case REG_F0:
+            dwarfReg = 64;
+            break;
+        case REG_F1:
+            dwarfReg = 65;
+            break;
+        case REG_F2:
+            dwarfReg = 66;
+            break;
+        case REG_F3:
+            dwarfReg = 67;
+            break;
+        case REG_F4:
+            dwarfReg = 68;
+            break;
+        case REG_F5:
+            dwarfReg = 69;
+            break;
+        case REG_F6:
+            dwarfReg = 70;
+            break;
+        case REG_F7:
+            dwarfReg = 71;
+            break;
+        case REG_F8:
+            dwarfReg = 72;
+            break;
+        case REG_F9:
+            dwarfReg = 73;
+            break;
+        case REG_F10:
+            dwarfReg = 74;
+            break;
+        case REG_F11:
+            dwarfReg = 75;
+            break;
+        case REG_F12:
+            dwarfReg = 76;
+            break;
+        case REG_F13:
+            dwarfReg = 77;
+            break;
+        case REG_F14:
+            dwarfReg = 78;
+            break;
+        case REG_F15:
+            dwarfReg = 79;
+            break;
+        case REG_F16:
+            dwarfReg = 80;
+            break;
+        case REG_F17:
+            dwarfReg = 81;
+            break;
+        case REG_F18:
+            dwarfReg = 82;
+            break;
+        case REG_F19:
+            dwarfReg = 83;
+            break;
+        case REG_F20:
+            dwarfReg = 84;
+            break;
+        case REG_F21:
+            dwarfReg = 85;
+            break;
+        case REG_F22:
+            dwarfReg = 86;
+            break;
+        case REG_F23:
+            dwarfReg = 87;
+            break;
+        case REG_F24:
+            dwarfReg = 88;
+            break;
+        case REG_F25:
+            dwarfReg = 89;
+            break;
+        case REG_F26:
+            dwarfReg = 90;
+            break;
+        case REG_F27:
+            dwarfReg = 91;
+            break;
+        case REG_F28:
+            dwarfReg = 92;
+            break;
+        case REG_F29:
+            dwarfReg = 93;
+            break;
+        case REG_F30:
+            dwarfReg = 94;
+            break;
+        case REG_F31:
+            dwarfReg = 95;
+            break;
+
+        default:
+            NYI("CFI codes");
+    }
+
+    return dwarfReg;
+}
+#endif // FEATURE_CFI_SUPPORT
+
+void Compiler::unwindPush(regNumber reg)
+{
+    unreached(); // use one of the unwindSaveReg* functions instead.
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            unwindAllocStackCFI(size);
+        }
+
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    assert(size % 16 == 0);
+    unsigned x = size / 16;
+
+    if (x <= 0x1F)
+    {
+        // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+        // TODO-Review: should say size < 512
+
+        pu->AddCode((BYTE)x);
+    }
+    else if (x <= 0x7F)
+    {
+        // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16)
+
+        pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x);
+    }
+    else
+    {
+        // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+        //
+        // For large stack size, the most significant bits
+        // are stored first (and next to the opCode) per the unwind spec.
+
+        pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x);
+    }
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            unwindSetFrameRegCFI(reg, offset);
+        }
+
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    if (offset == 0)
+    {
+        assert(reg == REG_FP);
+
+        // set_fp: 11100001 : set up fp : with : move fp, sp
+        pu->AddCode(0xE1);
+    }
+    else
+    {
+        // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8
+
+        assert(reg == REG_FP);
+        assert((offset % 8) == 0);
+
+        unsigned x = offset / 8;
+        assert(x <= 0x1FF);
+
+        pu->AddCode(0xE2, (BYTE)(x >> 8), (BYTE)x);
+    }
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+    unwindSaveReg(reg, (int)offset);
+}
+
+void Compiler::unwindNop()
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("unwindNop: adding NOP\n");
+    }
+#endif
+
+    INDEBUG(pu->uwiAddingNOP = true);
+
+    // nop: 11100011: no unwind operation is required.
+    pu->AddCode(0xE3);
+
+    INDEBUG(pu->uwiAddingNOP = false);
+}
+
+void Compiler::unwindSaveReg(regNumber reg, int offset)
+{
+
+    // st.d reg, sp, offset
+
+    // offset for store in prolog must be positive and a multiple of 8.
+    assert(0 <= offset && offset <= 2047);
+    assert((offset % 8) == 0);
+
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            FuncInfoDsc*   func     = funCurrentFunc();
+            UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func);
+
+            createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset);
+        }
+
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+    int z = offset / 8;
+    // assert(0 <= z && z <= 0xFF);
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    if (emitter::isGeneralRegister(reg))
+    {
+        // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047
+
+        assert(reg == REG_RA || reg == REG_FP ||  // first legal register: RA
+               (REG_S0 <= reg && reg <= REG_S8)); // last legal register: S8
+
+        BYTE x = (BYTE)(reg - REG_RA);
+        assert(0 <= x && x <= 0x1E);
+
+        pu->AddCode(0xD0, (BYTE)x, (BYTE)z);
+    }
+    else
+    {
+        // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047
+
+        assert(REG_F24 <= reg && // first legal register: F24
+               reg <= REG_F31);  // last legal register: F31
+
+        BYTE x = (BYTE)(reg - REG_F24);
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xDC, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z);
+    }
+}
+
+void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset)
+{
+    assert(!"unused on LOONGARCH64 yet");
+}
+
+void Compiler::unwindReturn(regNumber reg)
+{
+    // Nothing to do; we will always have at least one trailing "end" opcode in our padding.
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Debug helpers                                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes
+
+unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
+{
+    static BYTE s_UnwindSize[256] = {
+        // array of unwind sizes, in bytes (as specified in the LOONGARCH unwind specification)
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF
+        2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, // C0-CF
+        3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, // D0-DF
+        4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  // F0-FF
+    };
+
+    unsigned size = s_UnwindSize[b1];
+    assert(1 <= size && size <= 4);
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Support Classes                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindCodesBase
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG
+
+// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes.
+unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog)
+{
+    BYTE*    pCodesStart = GetCodes();
+    BYTE*    pCodes      = pCodesStart;
+    unsigned size        = 0;
+    for (;;)
+    {
+        BYTE b1 = *pCodes;
+        if (IsEndCode(b1))
+        {
+            break; // We hit an "end" code; we're done
+        }
+        size += 4; // All codes represent 4 byte instructions.
+        pCodes += GetUnwindSizeFromUnwindHeader(b1);
+        assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed
+    }
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Debug dumpers                                                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// start is 0-based index from LSB, length is number of bits
+DWORD ExtractBits(DWORD dw, DWORD start, DWORD length)
+{
+    return (dw >> start) & ((1 << length) - 1);
+}
+
+// Dump the unwind data.
+// Arguments:
+//      isHotCode:          true if this unwind data is for the hot section
+//      startOffset:        byte offset of the code start that this unwind data represents
+//      endOffset:          byte offset of the code end   that this unwind data represents
+//      pHeader:            pointer to the unwind data blob
+//      unwindBlockSize:    size in bytes of the unwind data blob
+
+void DumpUnwindInfo(Compiler*         comp,
+                    bool              isHotCode,
+                    UNATIVE_OFFSET    startOffset,
+                    UNATIVE_OFFSET    endOffset,
+                    const BYTE* const pHeader,
+                    ULONG             unwindBlockSize)
+{
+    printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+
+    // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
+    // to provide padding, and round down to get a multiple of 4 bytes in size.
+    DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
+    DWORD dw;
+
+    dw = *pdw++;
+
+    DWORD codeWords      = ExtractBits(dw, 27, 5);
+    DWORD epilogCount    = ExtractBits(dw, 22, 5);
+    DWORD EBit           = ExtractBits(dw, 21, 1);
+    DWORD XBit           = ExtractBits(dw, 20, 1);
+    DWORD Vers           = ExtractBits(dw, 18, 2);
+    DWORD functionLength = ExtractBits(dw, 0, 18);
+
+    printf("  >> Start offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset));
+    printf("  >>   End offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset));
+    printf("  Code Words        : %u\n", codeWords);
+    printf("  Epilog Count      : %u\n", epilogCount);
+    printf("  E bit             : %u\n", EBit);
+    printf("  X bit             : %u\n", XBit);
+    printf("  Vers              : %u\n", Vers);
+    printf("  Function Length   : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength,
+           functionLength * 4, functionLength * 4);
+
+    assert(functionLength * 4 == endOffset - startOffset);
+
+    if (codeWords == 0 && epilogCount == 0)
+    {
+        // We have an extension word specifying a larger number of Code Words or Epilog Counts
+        // than can be specified in the header word.
+
+        dw = *pdw++;
+
+        codeWords   = ExtractBits(dw, 16, 8);
+        epilogCount = ExtractBits(dw, 0, 16);
+        assert((dw & 0xF0000000) == 0); // reserved field should be zero
+
+        printf("  ---- Extension word ----\n");
+        printf("  Extended Code Words        : %u\n", codeWords);
+        printf("  Extended Epilog Count      : %u\n", epilogCount);
+    }
+
+    bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false
+
+    if (EBit == 0)
+    {
+        // We have an array of epilog scopes
+
+        printf("  ---- Epilog scopes ----\n");
+        if (epilogCount == 0)
+        {
+            printf("  No epilogs\n");
+        }
+        else
+        {
+            for (DWORD scope = 0; scope < epilogCount; scope++)
+            {
+                dw = *pdw++;
+
+                DWORD epilogStartOffset = ExtractBits(dw, 0, 18);
+                DWORD res               = ExtractBits(dw, 18, 4);
+                DWORD epilogStartIndex  = ExtractBits(dw, 22, 10);
+
+                // Note that epilogStartOffset for a funclet is the offset from the beginning
+                // of the current funclet, not the offset from the beginning of the main function.
+                // To help find it when looking through JitDump output, also show the offset from
+                // the beginning of the main function.
+                DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset;
+
+                assert(res == 0);
+
+                printf("  ---- Scope %d\n", scope);
+                printf("  Epilog Start Offset        : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main "
+                       "function begin = %u (0x%06x)\n",
+                       comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset),
+                       comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin));
+                printf("  Epilog Start Index         : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex);
+
+                epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+            }
+        }
+    }
+    else
+    {
+        printf("  --- One epilog, unwind codes at %u\n", epilogCount);
+        assert(epilogCount < ArrLen(epilogStartAt));
+        epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+    }
+
+    // Dump the unwind codes
+
+    printf("  ---- Unwind codes ----\n");
+
+    DWORD countOfUnwindCodes = codeWords * 4;
+    PBYTE pUnwindCode        = (PBYTE)pdw;
+    BYTE  b1, b2, b3, b4;
+    DWORD x, z;
+    for (DWORD i = 0; i < countOfUnwindCodes; i++)
+    {
+        // Does this byte start an epilog sequence? If so, note that fact.
+        if (epilogStartAt[i])
+        {
+            printf("    ---- Epilog start at index %u ----\n", i);
+        }
+
+        b1 = *pUnwindCode++;
+
+        if ((b1 & 0xE0) == 0)
+        {
+            // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+            // TODO-Review:should say size < 512
+            x = b1 & 0x1F;
+            printf("    %02X          alloc_s #%u (0x%02X); addi.d sp, sp, -%u (0x%03X)\n", b1, x, x, x * 16, x * 16);
+        }
+#if 0
+        else if ((b1 & 0xE0) == 0x20)
+        {
+            // save_s0s1_x: 001zzzzz: save <s0,s1> pair at [sp-#Z*8]!, pre-indexed offset >= -248
+            z = b1 & 0x1F;
+            printf("    %02X          save_s0s1_x #%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, z, z,
+                   getRegName(REG_S0), getRegName(REG_S1), z * 8);
+        }
+        else if ((b1 & 0xF0) == 0x40)
+        {
+            // save_fpra: 0100zzzz | zzzzzzzz: save <fp,ra> pair at [sp+#Z*8], offset <= 4080
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2;
+            printf("    %02X %02X          save_fpra #%u (0x%03X); Two sd %s, %s, [sp, #%u]\n", b1, b2, z, z, getRegName(REG_FP),
+                   getRegName(REG_RA), z * 8);
+        }
+        else if ((b1 & 0xF0) == 0x80)
+        {
+            // save_fpra_x: 1000zzzz | zzzzzzzz: save <fp,ra> pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -32768
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2;
+            printf("    %02X %02X          save_fpra_x #%u (0x%03X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, z, z,
+                   getRegName(REG_FP), getRegName(REG_RA), (z + 1) * 8);
+        }
+#endif
+        else if ((b1 & 0xF8) == 0xC0)
+        {
+            // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16)
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2;
+
+            printf("    %02X %02X       alloc_m #%u (0x%03X); addi.d sp, sp, -%u (0x%04X)\n", b1, b2, x, x, x * 16,
+                   x * 16);
+        }
+        else if (b1 == 0xD0)
+        {
+            // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)b2;
+            z = (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_reg X#%u Z#%u (0x%02X); st.d %s, sp, %u\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_RA + x), z * 8);
+        }
+#if 0
+        else if (b1 == 0xC8)
+        {
+            // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_regp X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), z * 8);
+        }
+        else if (b1 == 0xCC)
+        {
+            // save_regp_x: 11001100 | 0xxxzzzz | zzzzzzzz: save pair s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+            // -32768
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i+= 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_regp_x X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), (z + 1) * 8);
+        }
+        else if ((b1 & 0xFE) == 0xD4)
+        {
+            // save_reg_x: 1101010x | xxxzzzzz: save reg s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -16384
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5);
+            z = (DWORD)(b2 & 0x1F);
+
+            printf("    %02X %02X       save_reg_x X#%u Z#%u (0x%02X); sd %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_S0 + x), (z + 1) * 8);
+        }
+        else if (b1 == 0xD6)
+        {
+            // save_rapair: 11010110 | 0xxxzzzz | zzzzzzzz: save pair <s0 + #X, ra> at [sp + #Z * 8], offset <= 32767
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_lrpair X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_S0 + x), getRegName(REG_RA), z * 8);
+        }
+        else if (b1 == 0xD8)
+        {
+            // save_fregp: 11011000 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X) at [sp + #Z * 8], offset <= 32767
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_fregp X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), z * 8);
+        }
+        else if (b1 == 0xDA)
+        {
+            // save_fregp_x: 11011010 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+            // -32768
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_fregp_x X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), (z + 1) * 8);
+        }
+#endif
+        else if (b1 == 0xDC)
+        {
+            // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_freg X#%u Z#%u (0x%02X); fst.d %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_F24 + x), z * 8);
+        }
+#if 0
+        else if (b1 == 0xDE)
+        {
+            // save_freg_x: 11011110 | xxxzzzzz : save reg f(24 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >=
+            // -16384
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = (DWORD)(b2 >> 5);
+            z = (DWORD)(b2 & 0x1F);
+
+            printf("    %02X %02X       save_freg_x X#%u Z#%u (0x%02X); sdc1 %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_F24 + x, true), (z + 1) * 8);
+        }
+#endif
+        else if (b1 == 0xE0)
+        {
+            // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+            assert(i + 3 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            b4 = *pUnwindCode++;
+            i += 3;
+
+            x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4;
+
+            printf("    %02X %02X %02X %02X alloc_l %u (0x%06X); addi.d sp, sp, -%u (%06X)\n", b1, b2, b3, b4, x, x,
+                   x * 16, x * 16);
+        }
+        else if (b1 == 0xE1)
+        {
+            // set_fp: 11100001 : set up $29 : with : move fp, sp
+
+            printf("    %02X          set_fp; move %s, sp\n", b1, getRegName(REG_FP));
+        }
+        else if (b1 == 0xE2)
+        {
+            // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8
+            assert(i + 2 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x,
+                   getRegName(REG_FP), x * 8);
+        }
+        else if (b1 == 0xE3)
+        {
+            // nop: 11100011: no unwind operation is required.
+
+            printf("    %02X          nop\n", b1);
+        }
+        else if (b1 == 0xE4)
+        {
+            // end: 11100100 : end of unwind code
+
+            printf("    %02X          end\n", b1);
+        }
+        else if (b1 == 0xE5)
+        {
+            // end_c: 11100101 : end of unwind code in current chained scope.
+
+            printf("    %02X          end_c\n", b1);
+        }
+        else if (b1 == 0xE6)
+        {
+            // save_next: 11100110 : save next non - volatile Int or FP register pair.
+
+            printf("    %02X          save_next\n", b1);
+        }
+        else
+        {
+            printf("===========[loongarch64] Unknown / reserved unwind code: %02X\n", b1);
+            // Unknown / reserved unwind code
+            assert(!"Internal error decoding unwind codes");
+        }
+    }
+
+    pdw += codeWords;
+    assert((PBYTE)pdw == pUnwindCode);
+    assert((PBYTE)pdw == pHeader + unwindBlockSize);
+
+    assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA
+
+    printf("\n");
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind APIs                                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void Compiler::unwindBegProlog()
+{
+    assert(compGeneratingProlog);
+
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        unwindBegPrologCFI();
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    // There is only one prolog for a function/funclet, and it comes first. So now is
+    // a good time to initialize all the unwind data structures.
+
+    emitLocation* startLoc;
+    emitLocation* endLoc;
+    unwindGetFuncLocations(func, true, &startLoc, &endLoc);
+
+    func->uwi.InitUnwindInfo(this, startLoc, endLoc);
+    func->uwi.CaptureLocation();
+
+    func->uwiCold = NULL; // No cold data yet
+}
+
+void Compiler::unwindEndProlog()
+{
+    assert(compGeneratingProlog);
+}
+
+void Compiler::unwindBegEpilog()
+{
+    assert(compGeneratingEpilog);
+
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    funCurrentFunc()->uwi.AddEpilog();
+}
+
+void Compiler::unwindEndEpilog()
+{
+    assert(compGeneratingEpilog);
+}
+
+// The instructions between the last captured "current state" and the current instruction
+// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes
+// for them.
+void Compiler::unwindPadding()
+{
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+    GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this);
+}
+
+// Ask the VM to reserve space for the unwind information for the function and
+// all its funclets.
+void Compiler::unwindReserve()
+{
+    assert(!compGeneratingProlog);
+    assert(!compGeneratingEpilog);
+
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindReserveFunc(funGetFunc(funcIdx));
+    }
+}
+
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+    BOOL isFunclet          = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
+    bool funcHasColdSection = false;
+
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        DWORD unwindCodeBytes = 0;
+        if (fgFirstColdBlock != nullptr)
+        {
+            eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes);
+        }
+        unwindCodeBytes = (DWORD)(func->cfiCodes->size() * sizeof(CFI_CODE));
+        eeReserveUnwindInfo(isFunclet, false /*isColdCode*/, unwindCodeBytes);
+
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    // If there is cold code, split the unwind data between the hot section and the
+    // cold section. This needs to be done before we split into fragments, as each
+    // of the hot and cold sections can have multiple fragments.
+
+    if (fgFirstColdBlock != NULL)
+    {
+        assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH
+
+        emitLocation* startLoc;
+        emitLocation* endLoc;
+        unwindGetFuncLocations(func, false, &startLoc, &endLoc);
+
+        func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo();
+        func->uwiCold->InitUnwindInfo(this, startLoc, endLoc);
+        func->uwiCold->HotColdSplitCodes(&func->uwi);
+
+        funcHasColdSection = true;
+    }
+
+    // First we need to split the function or funclet into fragments that are no larger
+    // than 512K, so the fragment size will fit in the unwind data "Function Length" field.
+    // The LOONGARCH Exception Data specification "Function Fragments" section describes this.
+    func->uwi.Split();
+
+    func->uwi.Reserve(isFunclet, true);
+
+    // After the hot section, split and reserve the cold section
+
+    if (funcHasColdSection)
+    {
+        assert(func->uwiCold != NULL);
+
+        func->uwiCold->Split();
+        func->uwiCold->Reserve(isFunclet, false);
+    }
+}
+
+// unwindEmit: Report all the unwind information to the VM.
+// Arguments:
+//      pHotCode:  Pointer to the beginning of the memory with the function and funclet hot  code
+//      pColdCode: Pointer to the beginning of the memory with the function and funclet cold code.
+
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+    }
+}
+
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+    // Verify that the JIT enum is in sync with the JIT-EE interface enum
+    static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+    static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+    static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+#if defined(FEATURE_CFI_SUPPORT)
+    if (generateCFIUnwindCodes())
+    {
+        unwindEmitFuncCFI(func, pHotCode, pColdCode);
+        return;
+    }
+#endif // FEATURE_CFI_SUPPORT
+
+    func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true);
+
+    if (func->uwiCold != NULL)
+    {
+        func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false);
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindPrologCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// We're going to use the prolog codes memory to store the final unwind data.
+// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then
+// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes.
+// Set the header pointer for future use, adding the header bytes (this pointer
+// is updated when a header byte is added), and remember the index that points
+// to the beginning of the header.
+
+void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
+{
+#ifdef DEBUG
+    // We're done adding codes. Check that we didn't accidentally create a bigger prolog.
+    unsigned codeSize = GetCodeSizeFromUnwindCodes(true);
+    assert(codeSize <= MAX_PROLOG_SIZE_BYTES);
+#endif // DEBUG
+
+    int prologBytes = Size();
+
+    EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment
+
+    upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header
+
+    assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3);
+
+    upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing
+    assert(upcHeaderSlot >= -1);
+
+    if (epilogBytes > 0)
+    {
+        // The prolog codes that are already at the end of the array need to get moved to the middle,
+        // with space for the non-matching epilog codes to follow.
+
+        memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes),
+                  &upcMem[upcCodeSlot], prologBytes);
+
+        // Note that the three UWC_END padding bytes still exist at the end of the array.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
+        memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes);
+#endif // DEBUG
+
+        upcEpilogSlot =
+            upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill
+
+        // Update upcCodeSlot to point at the new beginning of the prolog codes
+        upcCodeSlot = upcUnwindBlockSlot + headerBytes;
+    }
+}
+
+// Add a header word. Header words are added starting at the beginning, in order: first to last.
+// This is in contrast to the prolog unwind codes, which are added in reverse order.
+void UnwindPrologCodes::AddHeaderWord(DWORD d)
+{
+    assert(-1 <= upcHeaderSlot);
+    assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there!
+
+    // Store it byte-by-byte in little-endian format. We've already ensured there is enough space
+    // in SetFinalSize().
+    upcMem[++upcHeaderSlot] = (BYTE)d;
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 8);
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 16);
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 24);
+}
+
+// AppendEpilog: copy the epilog bytes to the next epilog bytes slot
+void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi)
+{
+    assert(upcEpilogSlot != -1);
+
+    int epiSize = pEpi->Size();
+    memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(),
+             epiSize); // -3 to avoid writing to the alignment padding
+    assert(pEpi->GetStartIndex() ==
+           upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it.
+
+    upcEpilogSlot += epiSize;
+    assert(upcEpilogSlot <= upcMemSize - 3);
+}
+
+// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes
+void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize)
+{
+    assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data!
+
+    *ppUnwindBlock = &upcMem[upcUnwindBlockSlot];
+
+    // We put 4 'end' codes at the end for padding, so we can ensure we have an
+    // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end'
+    // codes (leave one), and then align the size up to a multiple of 4.
+    *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD));
+}
+
+int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi)
+{
+    if (Size() < pEpi->Size())
+    {
+        return -1;
+    }
+
+    int matchIndex = 0; // Size() - pEpi->Size();
+
+    BYTE* pProlog = GetCodes();
+    BYTE* pEpilog = pEpi->GetCodes();
+
+    // First check set_fp.
+    if (0 < pEpi->Size())
+    {
+        if (*pProlog == 0xE1)
+        {
+            pProlog++;
+            if (*pEpilog == 0xE1)
+            {
+                pEpilog++;
+            }
+            else
+            {
+                matchIndex = 1;
+            }
+        }
+        else if (*pProlog == 0xE2)
+        {
+            pProlog += 3;
+            if (*pEpilog == 0xE1)
+            {
+                pEpilog += 3;
+            }
+            else
+            {
+                matchIndex = 3;
+            }
+        }
+    }
+
+    if (0 == memcmp(pProlog, pEpilog, pEpi->Size()))
+    {
+        return matchIndex;
+    }
+
+    return -1;
+}
+
+// Copy the prolog codes from another prolog. The only time this is legal is
+// if we are at the initial state and no prolog codes have been added.
+// This is used to create the 'phantom' prolog for non-first fragments.
+
+void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom)
+{
+    assert(uwiComp == pCopyFrom->uwiComp);
+    assert(upcMem == upcMemLocal);
+    assert(upcMemSize == UPC_LOCAL_COUNT);
+    assert(upcHeaderSlot == -1);
+    assert(upcEpilogSlot == -1);
+
+    // Copy the codes
+    EnsureSize(pCopyFrom->upcMemSize);
+    assert(upcMemSize == pCopyFrom->upcMemSize);
+    memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize);
+
+    // Copy the other data
+    upcCodeSlot        = pCopyFrom->upcCodeSlot;
+    upcHeaderSlot      = pCopyFrom->upcHeaderSlot;
+    upcEpilogSlot      = pCopyFrom->upcEpilogSlot;
+    upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot;
+}
+
+void UnwindPrologCodes::EnsureSize(int requiredSize)
+{
+    if (requiredSize > upcMemSize)
+    {
+        // Reallocate, and copy everything to a new array.
+
+        // Choose the next power of two size. This may or may not be the best choice.
+        noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+        int newSize;
+        for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1)
+        {
+            // do nothing
+        }
+
+        BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+        memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem,
+                 upcMemSize); // copy the existing data to the end
+#ifdef DEBUG
+        // Clear the old unwind codes; nobody should be looking at them
+        memset(upcMem, 0xFF, upcMemSize);
+#endif                           // DEBUG
+        upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+        upcCodeSlot += newSize - upcMemSize;
+        upcMemSize = newSize;
+    }
+}
+
+#ifdef DEBUG
+void UnwindPrologCodes::Dump(int indent)
+{
+    printf("%*sUnwindPrologCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  &upcMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&upcMemLocal[0]));
+    printf("%*s  upcMem: 0x%08p\n", indent, "", dspPtr(upcMem));
+    printf("%*s  upcMemSize: %d\n", indent, "", upcMemSize);
+    printf("%*s  upcCodeSlot: %d\n", indent, "", upcCodeSlot);
+    printf("%*s  upcHeaderSlot: %d\n", indent, "", upcHeaderSlot);
+    printf("%*s  upcEpilogSlot: %d\n", indent, "", upcEpilogSlot);
+    printf("%*s  upcUnwindBlockSlot: %d\n", indent, "", upcUnwindBlockSlot);
+
+    if (upcMemSize > 0)
+    {
+        printf("%*s  codes:", indent, "");
+        for (int i = 0; i < upcMemSize; i++)
+        {
+            printf(" %02x", upcMem[i]);
+            if (i == upcCodeSlot)
+                printf(" <-C");
+            else if (i == upcHeaderSlot)
+                printf(" <-H");
+            else if (i == upcEpilogSlot)
+                printf(" <-E");
+            else if (i == upcUnwindBlockSlot)
+                printf(" <-U");
+        }
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindEpilogCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindEpilogCodes::EnsureSize(int requiredSize)
+{
+    if (requiredSize > uecMemSize)
+    {
+        // Reallocate, and copy everything to a new array.
+
+        // Choose the next power of two size. This may or may not be the best choice.
+        noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+        int newSize;
+        for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1)
+        {
+            // do nothing
+        }
+
+        BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+        memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize);
+#ifdef DEBUG
+        // Clear the old unwind codes; nobody should be looking at them
+        memset(uecMem, 0xFF, uecMemSize);
+#endif                           // DEBUG
+        uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+        // uecCodeSlot stays the same
+        uecMemSize = newSize;
+    }
+}
+
+#ifdef DEBUG
+void UnwindEpilogCodes::Dump(int indent)
+{
+    printf("%*sUnwindEpilogCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  &uecMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&uecMemLocal[0]));
+    printf("%*s  uecMem: 0x%08p\n", indent, "", dspPtr(uecMem));
+    printf("%*s  uecMemSize: %d\n", indent, "", uecMemSize);
+    printf("%*s  uecCodeSlot: %d\n", indent, "", uecCodeSlot);
+    printf("%*s  uecFinalized: %s\n", indent, "", dspBool(uecFinalized));
+
+    if (uecMemSize > 0)
+    {
+        printf("%*s  codes:", indent, "");
+        for (int i = 0; i < uecMemSize; i++)
+        {
+            printf(" %02x", uecMem[i]);
+            if (i == uecCodeSlot)
+                printf(" <-C"); // Indicate the current pointer
+        }
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindEpilogInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Do the current unwind codes match those of the argument epilog?
+// If they don't match, return -1. If they do, return the offset into
+// our codes at which the argument codes match. Note that this means that
+// the argument codes can match a subset of our codes. The subset needs to be at
+// the end, for the "end" code to match.
+//
+// Note that if we wanted to handle 0xFD and 0xFE codes, by converting
+// an existing 0xFF code to one of those, we might do that here.
+
+int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi)
+{
+    if (Matches())
+    {
+        // We are already matched to someone else, and won't provide codes to the final layout
+        return -1;
+    }
+
+    if (Size() < pEpi->Size())
+    {
+        return -1;
+    }
+
+    int matchIndex = Size() - pEpi->Size();
+
+    if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size()))
+    {
+        return matchIndex;
+    }
+
+    return -1;
+}
+
+void UnwindEpilogInfo::CaptureEmitLocation()
+{
+    noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog
+    epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation();
+    epiEmitLocation->CaptureLocation(uwiComp->GetEmitter());
+}
+
+void UnwindEpilogInfo::FinalizeOffset()
+{
+    epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->GetEmitter());
+}
+
+#ifdef DEBUG
+void UnwindEpilogInfo::Dump(int indent)
+{
+    printf("%*sUnwindEpilogInfo @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  epiNext: 0x%08p\n", indent, "", dspPtr(epiNext));
+    printf("%*s  epiEmitLocation: 0x%08p\n", indent, "", dspPtr(epiEmitLocation));
+    printf("%*s  epiStartOffset: 0x%x\n", indent, "", epiStartOffset);
+    printf("%*s  epiMatches: %s\n", indent, "", dspBool(epiMatches));
+    printf("%*s  epiStartIndex: %d\n", indent, "", epiStartIndex);
+
+    epiCodes.Dump(indent + 2);
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindFragmentInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog)
+    : UnwindBase(comp)
+    , ufiNext(NULL)
+    , ufiEmitLoc(emitLoc)
+    , ufiHasPhantomProlog(hasPhantomProlog)
+    , ufiPrologCodes(comp)
+    , ufiEpilogFirst(comp)
+    , ufiEpilogList(NULL)
+    , ufiEpilogLast(NULL)
+    , ufiCurCodes(&ufiPrologCodes)
+    , ufiSize(0)
+    , ufiStartOffset(UFI_ILLEGAL_OFFSET)
+{
+#ifdef DEBUG
+    ufiNum         = 1;
+    ufiInProlog    = true;
+    ufiInitialized = UFI_INITIALIZED_PATTERN;
+#endif // DEBUG
+}
+
+void UnwindFragmentInfo::FinalizeOffset()
+{
+    if (ufiEmitLoc == NULL)
+    {
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        ufiStartOffset = 0;
+    }
+    else
+    {
+        ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        pEpi->FinalizeOffset();
+    }
+}
+
+void UnwindFragmentInfo::AddEpilog()
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+    if (ufiInProlog)
+    {
+        assert(ufiEpilogList == NULL);
+        ufiInProlog = false;
+    }
+    else
+    {
+        assert(ufiEpilogList != NULL);
+    }
+#endif // DEBUG
+
+    // Either allocate a new epilog object, or, for the first one, use the
+    // preallocated one that is a member of the UnwindFragmentInfo class.
+
+    UnwindEpilogInfo* newepi;
+
+    if (ufiEpilogList == NULL)
+    {
+        // Use the epilog that's in the class already. Be sure to initialize it!
+        newepi = ufiEpilogList = &ufiEpilogFirst;
+    }
+    else
+    {
+        newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp);
+    }
+
+    // Put the new epilog at the end of the epilog list
+
+    if (ufiEpilogLast != NULL)
+    {
+        ufiEpilogLast->epiNext = newepi;
+    }
+
+    ufiEpilogLast = newepi;
+
+    // What is the starting code offset of the epilog? Store an emitter location
+    // so we can ask the emitter later, after codegen.
+
+    newepi->CaptureEmitLocation();
+
+    // Put subsequent unwind codes in this new epilog
+
+    ufiCurCodes = &newepi->epiCodes;
+}
+
+// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will
+// become 'phantom' prolog codes in this fragment. Note that this fragment should
+// not have any prolog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom)
+{
+    ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes);
+    ufiPrologCodes.AddCode(UWC_END_C);
+}
+
+// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent
+// epilogs that start at or after the location represented by 'emitLoc' are removed
+// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have
+// any epilog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom)
+{
+    UnwindEpilogInfo* pEpiPrev;
+    UnwindEpilogInfo* pEpi;
+
+    UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->GetEmitter());
+
+    for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext)
+    {
+        pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it
+        if (pEpi->GetStartOffset() >= splitOffset)
+        {
+            // This epilog and all following epilogs, which must be in order of increasing offsets,
+            // get moved to this fragment.
+
+            // Splice in the epilogs to this fragment. Set the head of the epilog
+            // list to this epilog.
+            ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst'
+            ufiEpilogLast = pSplitFrom->ufiEpilogLast;
+
+            // Splice out the tail of the list from the 'pSplitFrom' epilog list
+            pSplitFrom->ufiEpilogLast = pEpiPrev;
+            if (pSplitFrom->ufiEpilogLast == NULL)
+            {
+                pSplitFrom->ufiEpilogList = NULL;
+            }
+            else
+            {
+                pSplitFrom->ufiEpilogLast->epiNext = NULL;
+            }
+
+            // No more codes should be added once we start splitting
+            pSplitFrom->ufiCurCodes = NULL;
+            ufiCurCodes             = NULL;
+
+            break;
+        }
+    }
+}
+
+// Is this epilog at the end of an unwind fragment? Ask the emitter.
+// Note that we need to know this before all code offsets are finalized,
+// so we can determine whether we can omit an epilog scope word for a
+// single matching epilog.
+
+bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi)
+{
+    return uwiComp->GetEmitter()->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc);
+}
+
+// Merge the unwind codes as much as possible.
+// This function is called before all offsets are final.
+// Also, compute the size of the final unwind block. Store this
+// and some other data for later, when we actually emit the
+// unwind block.
+
+void UnwindFragmentInfo::MergeCodes()
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+    unsigned epilogCount     = 0;
+    unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the
+                                  // prolog codes
+    unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes
+    UnwindEpilogInfo* pEpi;
+
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        ++epilogCount;
+
+        pEpi->FinalizeCodes();
+
+        // Does this epilog match the prolog?
+        // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal
+        // prolog and epilog codes.
+
+        int matchIndex;
+
+        matchIndex = ufiPrologCodes.Match(pEpi);
+        if (matchIndex != -1)
+        {
+            pEpi->SetMatches();
+            pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index
+        }
+        else
+        {
+            // The epilog codes don't match the prolog codes. Do they match any of the epilogs
+            // we've seen so far?
+
+            bool matched = false;
+            for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext)
+            {
+                matchIndex = pEpi2->Match(pEpi);
+                if (matchIndex != -1)
+                {
+                    // Use the same epilog index as the one we matched, as it has already been set.
+                    pEpi->SetMatches();
+                    pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's
+                                                                              // codes, in which case matchIndex > 0
+                    matched = true;
+                    break;
+                }
+            }
+
+            if (!matched)
+            {
+                pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location
+                epilogCodeBytes += pEpi->Size();
+                epilogIndex += pEpi->Size();
+            }
+        }
+    }
+
+    DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes;
+    codeBytes       = AlignUp(codeBytes, sizeof(DWORD));
+
+    DWORD codeWords =
+        codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block
+
+    // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"?
+
+    bool needExtendedCodeWordsEpilogCount =
+        (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT);
+
+    // How many epilog scope words do we need?
+
+    bool     setEBit      = false;       // do we need to set the E bit?
+    unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs!
+
+    if (epilogCount == 1)
+    {
+        assert(ufiEpilogList != NULL);
+        assert(ufiEpilogList->epiNext == NULL);
+
+        if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog
+            !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList))
+        {
+            epilogScopes = 0; // Don't need any epilog scope words
+            setEBit      = true;
+        }
+    }
+
+    DWORD headerBytes = (1                                            // Always need first header DWORD
+                         + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code
+                                                                      // Words or Extended Epilog Count?
+                         + epilogScopes                               // One DWORD per epilog scope, for EBit = 0
+                         ) *
+                        sizeof(DWORD); // convert it to bytes
+
+    DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words,
+                                               // including end padding if necessary
+
+    // Construct the final unwind information.
+
+    // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs
+    // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match
+    // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the
+    // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog
+    // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we
+    // have that much memory, and then copy the prolog codes to the right place, appending the non-matching
+    // epilog codes and prepending the header.
+
+    ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes);
+
+    if (epilogCodeBytes != 0)
+    {
+        // We need to copy the epilog code bytes to their final memory location
+
+        for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+            if (!pEpi->Matches())
+            {
+                ufiPrologCodes.AppendEpilog(pEpi);
+            }
+        }
+    }
+
+    // Save some data for later
+    ufiSize                             = finalSize;
+    ufiSetEBit                          = setEBit;
+    ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount;
+    ufiCodeWords                        = codeWords;
+    ufiEpilogScopes                     = epilogScopes;
+}
+
+// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header.
+
+void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+    if (0 && uwiComp->verbose)
+    {
+        printf("*************** Before fragment #%d finalize\n", ufiNum);
+        Dump();
+    }
+#endif
+
+    // Compute the header
+
+    noway_assert((functionLength & 3) == 0);
+    DWORD headerFunctionLength = functionLength / 4;
+
+    DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined.
+    DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some.
+    DWORD headerEBit;
+    DWORD headerEpilogCount; // This depends on how we set headerEBit.
+    DWORD headerCodeWords;
+    DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit.
+    DWORD headerExtendedCodeWords   = 0;
+
+    if (ufiSetEBit)
+    {
+        headerEBit        = 1;
+        headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes!
+        headerCodeWords   = ufiCodeWords;
+    }
+    else
+    {
+        headerEBit = 0;
+
+        if (ufiNeedExtendedCodeWordsEpilogCount)
+        {
+            headerEpilogCount         = 0;
+            headerCodeWords           = 0;
+            headerExtendedEpilogCount = ufiEpilogScopes;
+            headerExtendedCodeWords   = ufiCodeWords;
+        }
+        else
+        {
+            headerEpilogCount = ufiEpilogScopes;
+            headerCodeWords   = ufiCodeWords;
+        }
+    }
+
+    // Start writing the header
+
+    noway_assert(headerFunctionLength <=
+                 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error
+
+    if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT))
+    {
+        IMPL_LIMITATION("unwind data too large");
+    }
+
+    DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+                   (headerEpilogCount << 22) | (headerCodeWords << 27);
+
+    ufiPrologCodes.AddHeaderWord(header);
+
+    // Construct the second header word, if needed
+
+    if (ufiNeedExtendedCodeWordsEpilogCount)
+    {
+        noway_assert(headerEBit == 0);
+        noway_assert(headerEpilogCount == 0);
+        noway_assert(headerCodeWords == 0);
+        noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) ||
+                     (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT));
+
+        if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) ||
+            (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT))
+        {
+            IMPL_LIMITATION("unwind data too large");
+        }
+
+        DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16);
+
+        ufiPrologCodes.AddHeaderWord(header2);
+    }
+
+    // Construct the epilog scope words, if needed
+
+    if (!ufiSetEBit)
+    {
+        for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+            // The epilog must strictly follow the prolog. The prolog is in the first fragment of
+            // the hot section. If this epilog is at the start of a fragment, it can't be the
+            // first fragment in the hot section. We actually don't know if we're processing
+            // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus,
+            // we just assert that the epilog starts within the fragment.
+            assert(pEpi->GetStartOffset() >= GetStartOffset());
+
+            // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment,
+            // NOT the offset from the beginning of the main function.
+            DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset();
+
+            noway_assert((headerEpilogStartOffset & 3) == 0);
+            headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits
+                                          // of the actual offset is always zero)
+
+            DWORD headerEpilogStartIndex = pEpi->GetStartIndex();
+
+            if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) ||
+                (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX))
+            {
+                IMPL_LIMITATION("unwind data too large");
+            }
+
+            DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22);
+
+            ufiPrologCodes.AddHeaderWord(epilogScopeWord);
+        }
+    }
+
+    // The unwind code words are already here, following the header, so we're done!
+}
+
+void UnwindFragmentInfo::Reserve(bool isFunclet, bool isHotCode)
+{
+    assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH
+
+    MergeCodes();
+
+    BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+    ULONG unwindSize = Size();
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        if (ufiNum != 1)
+            printf("reserveUnwindInfo: fragment #%d:\n", ufiNum);
+    }
+#endif
+
+    uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+}
+
+// Allocate the unwind info for a fragment with the VM.
+// Arguments:
+//      funKind:       funclet kind
+//      pHotCode:      hot section code buffer
+//      pColdCode:     cold section code buffer
+//      funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a
+//                     function/funclet.
+//      isHotCode:     are we allocating the unwind info for the hot code section?
+
+void UnwindFragmentInfo::Allocate(
+    CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode)
+{
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    UNATIVE_OFFSET codeSize;
+
+    // We don't support hot/cold splitting with EH, so if there is cold code, this
+    // better not be a funclet!
+    // TODO-CQ: support funclets in cold code
+
+    noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT);
+
+    // Compute the final size, and start and end offsets of the fragment
+
+    startOffset = GetStartOffset();
+
+    if (ufiNext == NULL)
+    {
+        // This is the last fragment, so the fragment extends to the end of the function/fragment.
+        assert(funcEndOffset != 0);
+        endOffset = funcEndOffset;
+    }
+    else
+    {
+        // The fragment length is all the code between the beginning of this fragment
+        // and the beginning of the next fragment. Note that all fragments have had their
+        // offsets computed before any fragment is allocated.
+        endOffset = ufiNext->GetStartOffset();
+    }
+
+    assert(endOffset > startOffset);
+    codeSize = endOffset - startOffset;
+
+    // Finalize the fragment unwind block to hand to the VM
+
+    Finalize(codeSize);
+
+    // Get the final unwind information and hand it to the VM
+
+    ULONG unwindBlockSize;
+    BYTE* pUnwindBlock;
+
+    GetFinalInfo(&pUnwindBlock, &unwindBlockSize);
+
+#ifdef DEBUG
+    if (uwiComp->opts.dspUnwind)
+    {
+        DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize);
+    }
+#endif // DEBUG
+
+    // Adjust for cold or hot code:
+    // 1. The VM doesn't want the cold code pointer unless this is cold code.
+    // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+    //    and from the base of the cold section for cold code
+
+    if (isHotCode)
+    {
+        assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
+        pColdCode = NULL;
+    }
+    else
+    {
+        assert(startOffset >= uwiComp->info.compTotalHotCodeSize);
+        startOffset -= uwiComp->info.compTotalHotCodeSize;
+        endOffset -= uwiComp->info.compTotalHotCodeSize;
+    }
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        if (ufiNum != 1)
+            printf("unwindEmit: fragment #%d:\n", ufiNum);
+    }
+#endif // DEBUG
+
+    uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock,
+                               funKind);
+}
+
+#ifdef DEBUG
+void UnwindFragmentInfo::Dump(int indent)
+{
+    unsigned          count;
+    UnwindEpilogInfo* pEpi;
+
+    count = 0;
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        ++count;
+    }
+
+    printf("%*sUnwindFragmentInfo #%d, @0x%08p, size:%d:\n", indent, "", ufiNum, dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  ufiNext: 0x%08p\n", indent, "", dspPtr(ufiNext));
+    printf("%*s  ufiEmitLoc: 0x%08p\n", indent, "", dspPtr(ufiEmitLoc));
+    printf("%*s  ufiHasPhantomProlog: %s\n", indent, "", dspBool(ufiHasPhantomProlog));
+    printf("%*s  %d epilog%s\n", indent, "", count, (count != 1) ? "s" : "");
+    printf("%*s  ufiEpilogList: 0x%08p\n", indent, "", dspPtr(ufiEpilogList));
+    printf("%*s  ufiEpilogLast: 0x%08p\n", indent, "", dspPtr(ufiEpilogLast));
+    printf("%*s  ufiCurCodes: 0x%08p\n", indent, "", dspPtr(ufiCurCodes));
+    printf("%*s  ufiSize: %u\n", indent, "", ufiSize);
+    printf("%*s  ufiSetEBit: %s\n", indent, "", dspBool(ufiSetEBit));
+    printf("%*s  ufiNeedExtendedCodeWordsEpilogCount: %s\n", indent, "", dspBool(ufiNeedExtendedCodeWordsEpilogCount));
+    printf("%*s  ufiCodeWords: %u\n", indent, "", ufiCodeWords);
+    printf("%*s  ufiEpilogScopes: %u\n", indent, "", ufiEpilogScopes);
+    printf("%*s  ufiStartOffset: 0x%x\n", indent, "", ufiStartOffset);
+    printf("%*s  ufiInProlog: %s\n", indent, "", dspBool(ufiInProlog));
+    printf("%*s  ufiInitialized: 0x%08x\n", indent, "", ufiInitialized);
+
+    ufiPrologCodes.Dump(indent + 2);
+
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        pEpi->Dump(indent + 2);
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc)
+{
+    uwiComp = comp;
+
+    // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated.
+    // However, its constructor needs to be explicitly called, since the constructor for
+    // UnwindInfo is not called.
+
+    new (&uwiFragmentFirst, jitstd::placement_t()) UnwindFragmentInfo(comp, startLoc, false);
+
+    uwiFragmentLast = &uwiFragmentFirst;
+
+    uwiEndLoc = endLoc;
+
+    // Allocate an emitter location object. It is initialized to something
+    // invalid: it has a null 'ig' that needs to get set before it can be used.
+    // Note that when we create an UnwindInfo for the cold section, this never
+    // gets initialized with anything useful, since we never add unwind codes
+    // to the cold section; we simply distribute the existing (previously added) codes.
+    uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation();
+
+#ifdef DEBUG
+    uwiInitialized = UWI_INITIALIZED_PATTERN;
+    uwiAddingNOP   = false;
+#endif // DEBUG
+}
+
+// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi')
+// and those that are in the cold section (move them to 'this'). There is exactly one fragment
+// in each UnwindInfo; the fragments haven't been split for size, yet.
+
+void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi)
+{
+    // Ensure that there is exactly a single fragment in both the hot and the cold sections
+    assert(&uwiFragmentFirst == uwiFragmentLast);
+    assert(&puwi->uwiFragmentFirst == puwi->uwiFragmentLast);
+    assert(uwiFragmentLast->ufiNext == NULL);
+    assert(puwi->uwiFragmentLast->ufiNext == NULL);
+
+    // The real prolog is in the hot section, so this, cold, section has a phantom prolog
+    uwiFragmentLast->ufiHasPhantomProlog = true;
+    uwiFragmentLast->CopyPrologCodes(puwi->uwiFragmentLast);
+
+    // Now split the epilog codes
+    uwiFragmentLast->SplitEpilogCodes(uwiFragmentLast->ufiEmitLoc, puwi->uwiFragmentLast);
+}
+
+// Split the function or funclet into fragments that are no larger than 512K,
+// so the fragment size will fit in the unwind data "Function Length" field.
+// The LOONGARCH Exception Data specification "Function Fragments" section describes this.
+// We split the function so that it is no larger than 512K bytes, or the value of
+// the COMPlus_JitSplitFunctionSize value, if defined (and smaller). We must determine
+// how to split the function/funclet before we issue the instructions, so we can
+// reserve the unwind space with the VM. The instructions issued may shrink (but not
+// expand!) during issuing (although this is extremely rare in any case, and may not
+// actually occur on LOONGARCH), so we don't finalize actual sizes or offsets.
+//
+// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this
+// works!
+//
+// We don't split any prolog or epilog. Ideally, we might not split an instruction,
+// although that doesn't matter because the unwind at any point would still be
+// well-defined.
+
+void UnwindInfo::Split()
+{
+    UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes
+
+    maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES;
+
+#ifdef DEBUG
+    // Consider COMPlus_JitSplitFunctionSize
+    unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize();
+
+    if (splitFunctionSize != 0)
+        if (splitFunctionSize < maxFragmentSize)
+            maxFragmentSize = splitFunctionSize;
+#endif // DEBUG
+
+    // Now, there should be exactly one fragment.
+
+    assert(uwiFragmentLast != NULL);
+    assert(uwiFragmentLast == &uwiFragmentFirst);
+    assert(uwiFragmentLast->ufiNext == NULL);
+
+    // Find the code size of this function/funclet.
+
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    UNATIVE_OFFSET codeSize;
+
+    if (uwiFragmentLast->ufiEmitLoc == NULL)
+    {
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        startOffset = 0;
+    }
+    else
+    {
+        startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    if (uwiEndLoc == NULL)
+    {
+        // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions
+        // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks.
+        // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late
+        // for us, since we need to decide how many fragments we need before the code memory is allocated
+        // (which is before instruction issuing).
+        UNATIVE_OFFSET estimatedTotalCodeSize =
+            uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize;
+        assert(estimatedTotalCodeSize != 0);
+        endOffset = estimatedTotalCodeSize;
+    }
+    else
+    {
+        endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    assert(endOffset > startOffset); // there better be at least 1 byte of code
+    codeSize = endOffset - startOffset;
+
+    // Now that we know the code size for this section (main function hot or cold, or funclet),
+    // figure out how many fragments we're going to need.
+
+    UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up
+    assert(numberOfFragments > 0);
+
+    if (numberOfFragments == 1)
+    {
+        // No need to split; we're done
+        return;
+    }
+
+    // Now, we're going to commit to splitting the function into "numberOfFragments" fragments,
+    // for the purpose of unwind information. We need to do the actual splits so we can figure out
+    // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know
+    // the actual offsets of the splits since we haven't issued the instructions yet, so store
+    // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
+    // like we do for the function length and epilog offsets.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n",
+               numberOfFragments, codeSize, maxFragmentSize);
+    }
+#endif // DEBUG
+
+    // Call the emitter to do the split, and call us back for every split point it chooses.
+    uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this,
+                                     EmitSplitCallback);
+
+#ifdef DEBUG
+    // Did the emitter split the function/funclet into as many fragments as we asked for?
+    // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not
+    // be fewer if we're splitting into 512K blocks!
+
+    unsigned fragCount = 0;
+    for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        ++fragCount;
+    }
+    if (fragCount < numberOfFragments)
+    {
+        if (uwiComp->verbose)
+        {
+            printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount);
+        }
+
+        // If this fires, then we split into fewer fragments than we asked for, and we are using
+        // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit
+        // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something
+        // small, we might not be able to split into as many fragments as asked for, because we
+        // can't split prologs or epilogs.
+        assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES);
+    }
+#endif // DEBUG
+}
+
+/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc)
+{
+    UnwindInfo* puwi = (UnwindInfo*)context;
+    puwi->AddFragment(emitLoc);
+}
+
+// Reserve space for the unwind info for all fragments
+
+void UnwindInfo::Reserve(bool isFunclet, bool isHotCode)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(isHotCode || !isFunclet);
+
+    for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Reserve(isFunclet, isHotCode);
+    }
+}
+
+// Allocate and populate VM unwind info for all fragments
+
+void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+
+    UnwindFragmentInfo* pFrag;
+
+    // First, finalize all the offsets (the location of the beginning of fragments, and epilogs),
+    // so a fragment can use the finalized offset of the subsequent fragment to determine its code size.
+
+    UNATIVE_OFFSET endOffset;
+
+    if (uwiEndLoc == NULL)
+    {
+        assert(uwiComp->info.compNativeCodeSize != 0);
+        endOffset = uwiComp->info.compNativeCodeSize;
+    }
+    else
+    {
+        endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->FinalizeOffset();
+    }
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode);
+    }
+}
+
+void UnwindInfo::AddEpilog()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiFragmentLast != NULL);
+    uwiFragmentLast->AddEpilog();
+    CaptureLocation();
+}
+
+void UnwindInfo::CaptureLocation()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiCurLoc != NULL);
+    uwiCurLoc->CaptureLocation(uwiComp->GetEmitter());
+}
+
+void UnwindInfo::AddFragment(emitLocation* emitLoc)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiFragmentLast != NULL);
+
+    UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true);
+
+#ifdef DEBUG
+    newFrag->ufiNum = uwiFragmentLast->ufiNum + 1;
+#endif // DEBUG
+
+    newFrag->CopyPrologCodes(&uwiFragmentFirst);
+    newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast);
+
+    // Link the new fragment in at the end of the fragment list
+    uwiFragmentLast->ufiNext = newFrag;
+    uwiFragmentLast          = newFrag;
+}
+
+#ifdef DEBUG
+
+void UnwindInfo::Dump(bool isHotCode, int indent)
+{
+    unsigned            count;
+    UnwindFragmentInfo* pFrag;
+
+    count = 0;
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        ++count;
+    }
+
+    printf("%*sUnwindInfo %s@0x%08p, size:%d:\n", indent, "", isHotCode ? "" : "COLD ", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  %d fragment%s\n", indent, "", count, (count != 1) ? "s" : "");
+    printf("%*s  uwiFragmentLast: 0x%08p\n", indent, "", dspPtr(uwiFragmentLast));
+    printf("%*s  uwiEndLoc: 0x%08p\n", indent, "", dspPtr(uwiEndLoc));
+    printf("%*s  uwiInitialized: 0x%08x\n", indent, "", uwiInitialized);
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Dump(indent + 2);
+    }
+}
+
+#endif // DEBUG
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 529c6538699f5..3364c84a1d859 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -227,6 +227,17 @@ const char* getRegNameFloat(regNumber reg, var_types type)
 
     return regNamesFloat[reg];
 
+#elif defined(TARGET_LOONGARCH64)
+
+    static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "register.h"
+    };
+
+    assert((unsigned)reg < ArrLen(regNamesFloat));
+
+    return regNamesFloat[reg];
+
 #else
     static const char* regNamesFloat[] = {
 #define REGDEF(name, rnum, mask, sname) "x" sname,
@@ -316,6 +327,14 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
                 }
 #elif defined(TARGET_X86)
 // No register ranges
+
+#elif defined(TARGET_LOONGARCH64)
+                if (REG_A0 <= regNum && regNum <= REG_T8)
+                {
+                    regHead    = regNum;
+                    inRegRange = true;
+                    sep        = "-";
+                }
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -325,10 +344,12 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
             // We've already printed a register. Is this the end of a range?
             else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB
                      || (regNum == REG_R28))                         // last register before FP
-#else                                                                // TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+            else if ((regNum == REG_INT_LAST) || (regNum == REG_A7) || (regNum == REG_T8))
+#else  // TARGET_LOONGARCH64
             // We've already printed a register. Is this the end of a range?
             else if (regNum == REG_INT_LAST)
-#endif                                                               // TARGET_ARM64
+#endif // TARGET_LOONGARCH64
             {
                 const char* nam = getRegName(regNum);
                 printf("%s%s", sep, nam);
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 27d0059552241..d4833dce961f9 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -49,13 +49,13 @@ struct FloatTraits
     // Notes:
     //    "Default" NaN value returned by expression 0.0f / 0.0f on x86/x64 has
     //    different binary representation (0xffc00000) than NaN on
-    //    ARM32/ARM64 (0x7fc00000).
+    //    ARM32/ARM64/LoongArch64 (0x7fc00000).
 
     static float NaN()
     {
 #if defined(TARGET_XARCH)
         unsigned bits = 0xFFC00000u;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         unsigned           bits = 0x7FC00000u;
 #else
 #error Unsupported or unset target architecture
@@ -75,13 +75,13 @@ struct DoubleTraits
     // Notes:
     //    "Default" NaN value returned by expression 0.0 / 0.0 on x86/x64 has
     //    different binary representation (0xfff8000000000000) than NaN on
-    //    ARM32/ARM64 (0x7ff8000000000000).
+    //    ARM32/ARM64/LoongArch64 (0x7ff8000000000000).
 
     static double NaN()
     {
 #if defined(TARGET_XARCH)
         unsigned long long bits = 0xFFF8000000000000ull;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         unsigned long long bits = 0x7FF8000000000000ull;
 #else
 #error Unsupported or unset target architecture
@@ -106,7 +106,7 @@ struct DoubleTraits
 template <typename TFp, typename TFpTraits>
 TFp FpAdd(TFp value1, TFp value2)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // If [value1] is negative infinity and [value2] is positive infinity
     //   the result is NaN.
     // If [value1] is positive infinity and [value2] is negative infinity
@@ -124,7 +124,7 @@ TFp FpAdd(TFp value1, TFp value2)
             return TFpTraits::NaN();
         }
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return value1 + value2;
 }
@@ -142,7 +142,7 @@ TFp FpAdd(TFp value1, TFp value2)
 template <typename TFp, typename TFpTraits>
 TFp FpSub(TFp value1, TFp value2)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // If [value1] is positive infinity and [value2] is positive infinity
     //   the result is NaN.
     // If [value1] is negative infinity and [value2] is negative infinity
@@ -160,7 +160,7 @@ TFp FpSub(TFp value1, TFp value2)
             return TFpTraits::NaN();
         }
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return value1 - value2;
 }
@@ -178,7 +178,7 @@ TFp FpSub(TFp value1, TFp value2)
 template <typename TFp, typename TFpTraits>
 TFp FpMul(TFp value1, TFp value2)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // From the ECMA standard:
     //
     // If [value1] is zero and [value2] is infinity
@@ -194,7 +194,7 @@ TFp FpMul(TFp value1, TFp value2)
     {
         return TFpTraits::NaN();
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return value1 * value2;
 }
@@ -212,7 +212,7 @@ TFp FpMul(TFp value1, TFp value2)
 template <typename TFp, typename TFpTraits>
 TFp FpDiv(TFp dividend, TFp divisor)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // From the ECMA standard:
     //
     // If [dividend] is zero and [divisor] is zero
@@ -228,7 +228,7 @@ TFp FpDiv(TFp dividend, TFp divisor)
     {
         return TFpTraits::NaN();
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return dividend / divisor;
 }
@@ -9767,7 +9767,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
         vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
     }
 
-#if defined(FEATURE_READYTORUN) && defined(TARGET_ARMARCH)
+#if defined(FEATURE_READYTORUN) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64))
     if (call->IsR2RRelativeIndir())
     {
 #ifdef DEBUG
@@ -9782,7 +9782,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
         // in morph. So we do not need to use EntryPointAddrAsArg0, because arg0 is already an entry point addr.
         useEntryPointAddrAsArg0 = false;
     }
-#endif // FEATURE_READYTORUN && TARGET_ARMARCH
+#endif // FEATURE_READYTORUN && (TARGET_ARMARCH || TARGET_LOONGARCH64)
 
     if (nArgs == 0)
     {
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index cc88c400a5a33..32f17c685e137 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -179,6 +179,10 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false)   // All of t
 
 #elif defined (TARGET_ARM)
 // No Hardware Intrinsics on ARM32
+
+#elif defined (TARGET_LOONGARCH64)
+    //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported.
+
 #else
 #error Unsupported platform
 #endif