From 7d7e5d105fe5bde1dba2550ed29dc73fd6ebbbd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Petryka?= <paprikapolishgamer@gmail.com>
Date: Fri, 23 Feb 2024 16:21:06 +0100
Subject: [PATCH 01/14] Merging with main Rewrite math jit helpers to managed
 code + Fix asserts.

---
 src/coreclr/inc/corinfo.h                     |  10 +-
 src/coreclr/inc/jithelpers.h                  |  34 +-
 src/coreclr/jit/gentree.cpp                   |   1 +
 src/coreclr/jit/morph.cpp                     |   4 +-
 src/coreclr/jit/utils.cpp                     |   4 -
 src/coreclr/jit/valuenum.cpp                  |  12 -
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp |  88 +---
 .../Runtime/CompilerHelpers/MathHelpers.cs    | 245 +---------
 .../Common/TypeSystem/IL/HelperExtensions.cs  |   8 +
 .../ILCompiler.Compiler/Compiler/JitHelper.cs |  62 ++-
 .../IL/ILImporter.Scanner.cs                  |  48 +-
 src/coreclr/vm/corelib.h                      |  23 +-
 src/coreclr/vm/ecall.cpp                      |  56 +++
 src/coreclr/vm/i386/jithelp.S                 |  81 ----
 src/coreclr/vm/i386/jithelp.asm               | 212 ---------
 src/coreclr/vm/i386/jitinterfacex86.cpp       |  45 --
 src/coreclr/vm/jithelpers.cpp                 | 429 ++----------------
 src/coreclr/vm/jitinterface.cpp               |  15 +-
 src/coreclr/vm/jitinterface.h                 |  11 -
 .../System.Private.CoreLib/src/System/Math.cs | 246 ++++++++++
 .../src/System/MathF.cs                       |  23 +
 21 files changed, 525 insertions(+), 1132 deletions(-)

diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index 5fad5e4b2429e..b4417e60af4d5 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -387,18 +387,18 @@ enum CorInfoHelpFunc
     CORINFO_HELP_ULMOD,
     CORINFO_HELP_LNG2DBL,               // Convert a signed int64 to a double
     CORINFO_HELP_ULNG2DBL,              // Convert a unsigned int64 to a double
-    CORINFO_HELP_DBL2INT,
+    CORINFO_HELP_DBL2INT,               // unused
     CORINFO_HELP_DBL2INT_OVF,
     CORINFO_HELP_DBL2LNG,
     CORINFO_HELP_DBL2LNG_OVF,
-    CORINFO_HELP_DBL2UINT,
+    CORINFO_HELP_DBL2UINT,              // unused
     CORINFO_HELP_DBL2UINT_OVF,
     CORINFO_HELP_DBL2ULNG,
     CORINFO_HELP_DBL2ULNG_OVF,
     CORINFO_HELP_FLTREM,
     CORINFO_HELP_DBLREM,
-    CORINFO_HELP_FLTROUND,
-    CORINFO_HELP_DBLROUND,
+    CORINFO_HELP_FLTROUND,              // unused
+    CORINFO_HELP_DBLROUND,              // unused
 
     /* Allocating a new object. Always use ICorClassInfo::getNewHelper() to decide
        which is the right helper to use to allocate an object of a given type. */
@@ -2061,7 +2061,7 @@ class ICorStaticInfo
     // Example of a scenario addressed by notifyMethodInfoUsage:
     //  1) Crossgen (with --opt-cross-module=MyLib) attempts to inline a call from MyLib.dll into MyApp.dll
     //     and realizes that the call always throws.
-    //  2) JIT aborts the inlining attempt and marks the call as no-return instead. The code that follows the call is 
+    //  2) JIT aborts the inlining attempt and marks the call as no-return instead. The code that follows the call is
     //     replaced with a breakpoint instruction that is expected to be unreachable.
     //  3) MyLib is updated to a new version so it's no longer within the same version bubble with MyApp.dll
     //     and the new version of the call no longer throws and does some work.
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index a0982f3ac6520..e0f65ff5de908 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -35,8 +35,6 @@
     JITHELPER(CORINFO_HELP_UDIV,                JIT_UDiv,           CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_UMOD,                JIT_UMod,           CORINFO_HELP_SIG_8_STACK)
 
-    // CORINFO_HELP_DBL2INT, CORINFO_HELP_DBL2UINT, and CORINFO_HELP_DBL2LONG get
-    // patched for CPUs that support SSE2 (P4 and above).
 #ifndef TARGET_64BIT
     JITHELPER(CORINFO_HELP_LLSH,                JIT_LLsh,           CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_LRSH,                JIT_LRsh,           CORINFO_HELP_SIG_REG_ONLY)
@@ -47,26 +45,26 @@
     JITHELPER(CORINFO_HELP_LRSZ,                NULL,               CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB)
 #endif // TARGET_64BIT
     JITHELPER(CORINFO_HELP_LMUL,                JIT_LMul,           CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_LMUL_OVF,            JIT_LMulOvf,        CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_ULMUL_OVF,           JIT_ULMulOvf,       CORINFO_HELP_SIG_16_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_LMUL_OVF,     NULL,               CORINFO_HELP_SIG_16_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_ULMUL_OVF,    NULL,               CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_LDIV,                JIT_LDiv,           CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_LMOD,                JIT_LMod,           CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_ULDIV,               JIT_ULDiv,          CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_ULMOD,               JIT_ULMod,          CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_LNG2DBL,             JIT_Lng2Dbl,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_ULNG2DBL,            JIT_ULng2Dbl,       CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2INT,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2INT_OVF,         JIT_Dbl2IntOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2LNG_OVF,         JIT_Dbl2LngOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2UINT_OVF,        JIT_Dbl2UIntOvf,    CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2ULNG,            JIT_Dbl2ULng,       CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2ULNG_OVF,        JIT_Dbl2ULngOvf,    CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_FLTREM,              JIT_FltRem,         CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBLREM,              JIT_DblRem,         CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_FLTROUND,            JIT_FloatRound,     CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBLROUND,            JIT_DoubleRound,    CORINFO_HELP_SIG_16_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_ULNG2DBL,     NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2INT,      NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2INT_OVF,  NULL,               CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2LNG,             JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG_OVF,  NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT_OVF, NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG,     NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG_OVF, NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_FLTREM,       NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBLREM,       NULL,               CORINFO_HELP_SIG_16_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_FLTROUND,     NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBLROUND,     NULL,               CORINFO_HELP_SIG_16_STACK)
 
     // Allocating a new object
     JITHELPER(CORINFO_HELP_NEWFAST,                     JIT_New,    CORINFO_HELP_SIG_REG_ONLY)
@@ -203,7 +201,7 @@
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE,              JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR,          JIT_GetSharedGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR,       JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY)
-    JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS,    JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY)    
+    JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS,    JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedNonGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED,    JIT_GetSharedGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED,       JIT_GetSharedNonGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY)
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 489d3f42c4c0a..ffb461ec4d829 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -5969,6 +5969,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                 switch (tree->AsIntrinsic()->gtIntrinsicName)
                 {
                     case NI_System_Math_Atan2:
+                    case NI_System_Math_FMod:
                     case NI_System_Math_Pow:
                         // These math intrinsics are actually implemented by user calls. Increase the
                         // Sethi 'complexity' by two to reflect the argument register requirement.
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 43fb58c0f8837..0e7edf4f15902 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -377,7 +377,9 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #if defined(TARGET_ARM) || defined(TARGET_AMD64)
                         return nullptr;
 #else  // TARGET_X86
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
+                        oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG);
+                        tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT);
+                        return fgMorphTree(tree);
 #endif // TARGET_X86
 
                     case TYP_LONG:
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index aed8cda7c24df..5b9e74e91b242 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1536,14 +1536,10 @@ void HelperCallProperties::init()
             case CORINFO_HELP_LMUL:
             case CORINFO_HELP_LNG2DBL:
             case CORINFO_HELP_ULNG2DBL:
-            case CORINFO_HELP_DBL2INT:
             case CORINFO_HELP_DBL2LNG:
-            case CORINFO_HELP_DBL2UINT:
             case CORINFO_HELP_DBL2ULNG:
             case CORINFO_HELP_FLTREM:
             case CORINFO_HELP_DBLREM:
-            case CORINFO_HELP_FLTROUND:
-            case CORINFO_HELP_DBLROUND:
 
                 isPure  = true;
                 noThrow = true;
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 79cc554835540..4a8ca85aa3e58 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -12759,11 +12759,6 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
             srcIsUnsigned = true;
             break;
 
-        case CORINFO_HELP_DBL2INT:
-            castToType   = TYP_INT;
-            castFromType = TYP_DOUBLE;
-            break;
-
         case CORINFO_HELP_DBL2INT_OVF:
             castToType       = TYP_INT;
             castFromType     = TYP_DOUBLE;
@@ -12781,11 +12776,6 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
             hasOverflowCheck = true;
             break;
 
-        case CORINFO_HELP_DBL2UINT:
-            castToType   = TYP_UINT;
-            castFromType = TYP_DOUBLE;
-            break;
-
         case CORINFO_HELP_DBL2UINT_OVF:
             castToType       = TYP_UINT;
             castFromType     = TYP_DOUBLE;
@@ -13091,11 +13081,9 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
     {
         case CORINFO_HELP_LNG2DBL:
         case CORINFO_HELP_ULNG2DBL:
-        case CORINFO_HELP_DBL2INT:
         case CORINFO_HELP_DBL2INT_OVF:
         case CORINFO_HELP_DBL2LNG:
         case CORINFO_HELP_DBL2LNG_OVF:
-        case CORINFO_HELP_DBL2UINT:
         case CORINFO_HELP_DBL2UINT_OVF:
         case CORINFO_HELP_DBL2ULNG:
         case CORINFO_HELP_DBL2ULNG_OVF:
diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index 930c200a34441..6491813e3ed4e 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -5,78 +5,13 @@
 #include "CommonMacros.h"
 #include "rhassert.h"
 
-//
-// Floating point and 64-bit integer math helpers.
-//
-
-EXTERN_C uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
-{
-    const double two63  = 2147483648.0 * 4294967296.0;
-    uint64_t ret;
-    if (val < two63)
-    {
-        ret = (int64_t)(val);
-    }
-    else
-    {
-        // subtract 0x8000000000000000, do the convert then add it back again
-        ret = (int64_t)(val - two63) + I64(0x8000000000000000);
-    }
-    return ret;
-}
-
 #undef min
 #undef max
 #include <cmath>
 
-EXTERN_C float REDHAWK_CALLCONV RhpFltRem(float dividend, float divisor)
-{
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-
-    if (divisor==0 || !std::isfinite(dividend))
-    {
-        return -nanf("");
-    }
-    else if (!std::isfinite(divisor) && !std::isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-    return fmodf(dividend,divisor);
-}
-
-EXTERN_C double REDHAWK_CALLCONV RhpDblRem(double dividend, double divisor)
-{
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-    if (divisor==0 || !std::isfinite(dividend))
-    {
-        return -nan("");
-    }
-    else if (!std::isfinite(divisor) && !std::isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-    return(fmod(dividend,divisor));
-}
+//
+// Floating point and 64-bit integer math helpers.
+//
 
 #ifdef HOST_ARM
 EXTERN_C int32_t REDHAWK_CALLCONV RhpIDiv(int32_t i, int32_t j)
@@ -152,22 +87,7 @@ EXTERN_C int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
     return (int64_t)val;
 }
 
-EXTERN_C int32_t REDHAWK_CALLCONV RhpDbl2Int(double val)
-{
-    return (int32_t)val;
-}
-
-EXTERN_C uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val)
-{
-    return (uint32_t)val;
-}
-
-EXTERN_C double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val)
-{
-    return (double)val;
-}
-
-EXTERN_C double REDHAWK_CALLCONV RhpULng2Dbl(uint64_t val)
+EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val)
 {
     return (double)val;
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
index 7175ea9c00cbe..b6930717ab0b7 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
@@ -5,8 +5,6 @@
 using System.Runtime;
 using System.Runtime.CompilerServices;
 
-using Internal.Runtime;
-
 namespace Internal.Runtime.CompilerHelpers
 {
     /// <summary>
@@ -16,136 +14,10 @@ namespace Internal.Runtime.CompilerHelpers
     internal static class MathHelpers
     {
 #if !TARGET_64BIT
-        //
-        // 64-bit checked multiplication for 32-bit platforms
-        //
-
         private const string RuntimeLibrary = "*";
 
-        // Helper to multiply two 32-bit uints
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static ulong Mul32x32To64(uint a, uint b)
-        {
-            return a * (ulong)b;
-        }
-
-        // Helper to get high 32-bit of 64-bit int
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static uint Hi32Bits(long a)
-        {
-            return (uint)(a >> 32);
-        }
-
-        // Helper to get high 32-bit of 64-bit int
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static uint Hi32Bits(ulong a)
-        {
-            return (uint)(a >> 32);
-        }
-
-        [RuntimeExport("LMulOvf")]
-        public static long LMulOvf(long i, long j)
-        {
-            long ret;
-
-            // Remember the sign of the result
-            int sign = (int)(Hi32Bits(i) ^ Hi32Bits(j));
-
-            // Convert to unsigned multiplication
-            if (i < 0) i = -i;
-            if (j < 0) j = -j;
-
-            // Get the upper 32 bits of the numbers
-            uint val1High = Hi32Bits(i);
-            uint val2High = Hi32Bits(j);
-
-            ulong valMid;
-
-            if (val1High == 0)
-            {
-                // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val2High, (uint)i);
-            }
-            else
-            {
-                if (val2High != 0)
-                    goto ThrowExcep;
-                // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val1High, (uint)j);
-            }
-
-            // See if any bits after bit 32 are set
-            if (Hi32Bits(valMid) != 0)
-                goto ThrowExcep;
-
-            ret = (long)(Mul32x32To64((uint)i, (uint)j) + (valMid << 32));
-
-            // check for overflow
-            if (Hi32Bits(ret) < (uint)valMid)
-                goto ThrowExcep;
-
-            if (sign >= 0)
-            {
-                // have we spilled into the sign bit?
-                if (ret < 0)
-                    goto ThrowExcep;
-            }
-            else
-            {
-                ret = -ret;
-                // have we spilled into the sign bit?
-                if (ret > 0)
-                    goto ThrowExcep;
-            }
-            return ret;
-
-        ThrowExcep:
-            return ThrowLngOvf();
-        }
-
-        [RuntimeExport("ULMulOvf")]
-        public static ulong ULMulOvf(ulong i, ulong j)
-        {
-            ulong ret;
-
-            // Get the upper 32 bits of the numbers
-            uint val1High = Hi32Bits(i);
-            uint val2High = Hi32Bits(j);
-
-            ulong valMid;
-
-            if (val1High == 0)
-            {
-                if (val2High == 0)
-                    return Mul32x32To64((uint)i, (uint)j);
-                // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val2High, (uint)i);
-            }
-            else
-            {
-                if (val2High != 0)
-                    goto ThrowExcep;
-                // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val1High, (uint)j);
-            }
-
-            // See if any bits after bit 32 are set
-            if (Hi32Bits(valMid) != 0)
-                goto ThrowExcep;
-
-            ret = Mul32x32To64((uint)i, (uint)j) + (valMid << 32);
-
-            // check for overflow
-            if (Hi32Bits(ret) < (uint)valMid)
-                goto ThrowExcep;
-            return ret;
-
-        ThrowExcep:
-            return ThrowULngOvf();
-        }
-
         [RuntimeImport(RuntimeLibrary, "RhpULMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern ulong RhpULMod(ulong i, ulong j);
 
         public static ulong ULMod(ulong i, ulong j)
@@ -157,7 +29,7 @@ public static ulong ULMod(ulong i, ulong j)
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpLMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern long RhpLMod(long i, long j);
 
         public static long LMod(long i, long j)
@@ -171,7 +43,7 @@ public static long LMod(long i, long j)
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpULDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern ulong RhpULDiv(ulong i, ulong j);
 
         public static ulong ULDiv(ulong i, ulong j)
@@ -183,7 +55,7 @@ public static ulong ULDiv(ulong i, ulong j)
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpLDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern long RhpLDiv(long i, long j);
 
         public static long LDiv(long i, long j)
@@ -196,94 +68,9 @@ public static long LDiv(long i, long j)
                 return RhpLDiv(i, j);
         }
 
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private static long ThrowLngDivByZero()
-        {
-            throw new DivideByZeroException();
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private static ulong ThrowULngDivByZero()
-        {
-            throw new DivideByZeroException();
-        }
-#endif // TARGET_64BIT
-
-        [RuntimeExport("Dbl2IntOvf")]
-        public static int Dbl2IntOvf(double val)
-        {
-            const double two31 = 2147483648.0;
-
-            // Note that this expression also works properly for val = NaN case
-            if (val > -two31 - 1 && val < two31)
-                return unchecked((int)val);
-
-            return ThrowIntOvf();
-        }
-
-        [RuntimeExport("Dbl2UIntOvf")]
-        public static uint Dbl2UIntOvf(double val)
-        {
-            // Note that this expression also works properly for val = NaN case
-            if (val > -1.0 && val < 4294967296.0)
-                return unchecked((uint)val);
-
-            return ThrowUIntOvf();
-        }
-
-        [RuntimeExport("Dbl2LngOvf")]
-        public static long Dbl2LngOvf(double val)
-        {
-            const double two63 = 2147483648.0 * 4294967296.0;
-
-            // Note that this expression also works properly for val = NaN case
-            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-            if (val > -two63 - 0x402 && val < two63)
-                return unchecked((long)val);
-
-            return ThrowLngOvf();
-        }
-
-        [RuntimeExport("Dbl2ULngOvf")]
-        public static ulong Dbl2ULngOvf(double val)
-        {
-            const double two64 = 2.0 * 2147483648.0 * 4294967296.0;
-
-            // Note that this expression also works properly for val = NaN case
-            if (val > -1.0 && val < two64)
-                return unchecked((ulong)val);
-
-            return ThrowULngOvf();
-        }
-
-        [RuntimeExport("Flt2IntOvf")]
-        public static int Flt2IntOvf(float val)
-        {
-            const double two31 = 2147483648.0;
-
-            // Note that this expression also works properly for val = NaN case
-            if (val > -two31 - 1 && val < two31)
-                return ((int)val);
-
-            return ThrowIntOvf();
-        }
-
-        [RuntimeExport("Flt2LngOvf")]
-        public static long Flt2LngOvf(float val)
-        {
-            const double two63 = 2147483648.0 * 4294967296.0;
-
-            // Note that this expression also works properly for val = NaN case
-            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-            if (val > -two63 - 0x402 && val < two63)
-                return ((long)val);
-
-            return ThrowIntOvf();
-        }
-
 #if TARGET_ARM
         [RuntimeImport(RuntimeLibrary, "RhpIDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern int RhpIDiv(int i, int j);
 
         public static int IDiv(int i, int j)
@@ -297,7 +84,7 @@ public static int IDiv(int i, int j)
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpUDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern uint RhpUDiv(uint i, uint j);
 
         public static long UDiv(uint i, uint j)
@@ -309,7 +96,7 @@ public static long UDiv(uint i, uint j)
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpIMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern int RhpIMod(int i, int j);
 
         public static int IMod(int i, int j)
@@ -323,7 +110,7 @@ public static int IMod(int i, int j)
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpUMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern uint RhpUMod(uint i, uint j);
 
         public static long UMod(uint i, uint j)
@@ -339,32 +126,31 @@ public static long UMod(uint i, uint j)
         // Matching return types of throw helpers enables tailcalling them. It improves performance
         // of the hot path because of it does not need to raise full stackframe.
         //
-
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static int ThrowIntOvf()
+        private static long ThrowLngOvf()
         {
             throw new OverflowException();
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static uint ThrowUIntOvf()
+        private static long ThrowLngDivByZero()
         {
-            throw new OverflowException();
+            throw new DivideByZeroException();
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static long ThrowLngOvf()
+        private static ulong ThrowULngDivByZero()
         {
-            throw new OverflowException();
+            throw new DivideByZeroException();
         }
 
+#if TARGET_ARM
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static ulong ThrowULngOvf()
+        private static int ThrowIntOvf()
         {
             throw new OverflowException();
         }
 
-#if TARGET_ARM
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static int ThrowIntDivByZero()
         {
@@ -377,5 +163,6 @@ private static uint ThrowUIntDivByZero()
             throw new DivideByZeroException();
         }
 #endif // TARGET_ARM
+#endif // TARGET_64BIT
     }
 }
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs
index 4ccaff2d6dd9f..fa24787613574 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs
@@ -7,6 +7,7 @@
 using Internal.IL.Stubs;
 
 using Debug = System.Diagnostics.Debug;
+using System.Xml.Linq;
 
 namespace Internal.IL
 {
@@ -40,6 +41,13 @@ public static MethodDesc GetOptionalHelperEntryPoint(this TypeSystemContext cont
             return helperMethod;
         }
 
+        public static MethodDesc GetHelperEntryPoint(this TypeSystemContext context, string typeNamespace, string typeName, string methodName)
+        {
+            MetadataType helperType = context.SystemModule.GetKnownType(typeNamespace, typeName);
+            MethodDesc helperMethod = helperType.GetKnownMethod(methodName, null);
+            return helperMethod;
+        }
+
         /// <summary>
         /// Emits a call to a throw helper. Use this to emit calls to static parameterless methods that don't return.
         /// The advantage of using this extension method is that you don't have to deal with what code to emit after
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index cf1d04ca666af..8d5b1357d9e3e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -164,54 +164,66 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                     methodDesc = context.GetHelperEntryPoint("LdTokenHelpers", "GetRuntimeFieldHandle");
                     break;
 
-                case ReadyToRunHelper.Lng2Dbl:
-                    mangledName = "RhpLng2Dbl";
+                case ReadyToRunHelper.LMulOfv:
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "LongMultiplyOverflow");
                     break;
-                case ReadyToRunHelper.ULng2Dbl:
-                    mangledName = "RhpULng2Dbl";
+                case ReadyToRunHelper.ULMulOvf:
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "ULongMultiplyOverflow");
                     break;
 
-                case ReadyToRunHelper.Dbl2Lng:
-                    mangledName = "RhpDbl2Lng";
+                case ReadyToRunHelper.ULng2Dbl:
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "ULongToDouble");
                     break;
                 case ReadyToRunHelper.Dbl2ULng:
-                    mangledName = "RhpDbl2ULng";
-                    break;
-                case ReadyToRunHelper.Dbl2Int:
-                    mangledName = "RhpDbl2Int";
-                    break;
-                case ReadyToRunHelper.Dbl2UInt:
-                    mangledName = "RhpDbl2UInt";
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULong");
                     break;
 
                 case ReadyToRunHelper.Dbl2IntOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2IntOvf");
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToIntOverflow");
                     break;
                 case ReadyToRunHelper.Dbl2UIntOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2UIntOvf");
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToUIntOverflow");
                     break;
                 case ReadyToRunHelper.Dbl2LngOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2LngOvf");
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToLongOverflow");
                     break;
                 case ReadyToRunHelper.Dbl2ULngOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2ULngOvf");
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULongOverflow");
                     break;
 
                 case ReadyToRunHelper.DblRem:
-                    mangledName = "RhpDblRem";
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleReminder");
                     break;
                 case ReadyToRunHelper.FltRem:
-                    mangledName = "RhpFltRem";
+                    methodDesc = context.GetHelperEntryPoint("System", "MathF", "FloatReminder");
                     break;
 
-                case ReadyToRunHelper.LMul:
-                    mangledName = "RhpLMul";
+                case ReadyToRunHelper.Dbl2Int:
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt");
                     break;
-                case ReadyToRunHelper.LMulOfv:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "LMulOvf");
+                case ReadyToRunHelper.Dbl2UInt:
+                    methodDesc = context.GetHelperEntryPoint("System", "MathF", "DoubleToUInt");
                     break;
-                case ReadyToRunHelper.ULMulOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "ULMulOvf");
+                case ReadyToRunHelper.DblRound:
+                    DefType doubleType = context.GetWellKnownType(WellKnownType.Double);
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("Round",
+                        new MethodSignature(MethodSignatureFlags.Static, 0, doubleType, [doubleType]));
+                    break;
+                case ReadyToRunHelper.FltRound:
+                    DefType floatType = context.GetWellKnownType(WellKnownType.Single);
+                    methodDesc = context.SystemModule.GetKnownType("System", "MathF").GetKnownMethod("Round",
+                        new MethodSignature(MethodSignatureFlags.Static, 0, floatType, [floatType]));
+                    break;
+
+                case ReadyToRunHelper.Lng2Dbl:
+                    mangledName = "RhpLng2Dbl";
+                    break;
+                case ReadyToRunHelper.Dbl2Lng:
+                    mangledName = "RhpDbl2Lng";
+                    break;
+
+                case ReadyToRunHelper.LMul:
+                    mangledName = "RhpLMul";
                     break;
 
                 case ReadyToRunHelper.Mod:
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
index 7ae867370bdf4..e152de9cb404a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System;
 using Internal.TypeSystem;
 using Internal.ReadyToRunConstants;
 
@@ -9,6 +10,7 @@
 
 using Debug = System.Diagnostics.Debug;
 using DependencyList = ILCompiler.DependencyAnalysisFramework.DependencyNodeCore<ILCompiler.DependencyAnalysis.NodeFactory>.DependencyList;
+using System.Reflection.Emit;
 
 #pragma warning disable IDE0060
 
@@ -1259,6 +1261,51 @@ private void ImportBinaryOperation(ILOpcode opcode)
                     {
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ThrowDivZero), "_divbyzero");
                     }
+                    _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.DblRem), "rem");
+                    _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.FltRem), "rem");
+                    break;
+            }
+        }
+
+        private void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool unsigned)
+        {
+            switch (wellKnownType)
+            {
+                case WellKnownType.SByte:
+                case WellKnownType.Int16:
+                case WellKnownType.Int32:
+                    if (checkOverflow)
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2IntOvf), "conv_i4_ovf");
+                    }
+                    break;
+                case WellKnownType.Int64:
+                    if (checkOverflow)
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2LngOvf), "conv_i8_ovf");
+                    }
+                    break;
+                case WellKnownType.Byte:
+                case WellKnownType.UInt16:
+                case WellKnownType.UInt32:
+                    if (checkOverflow)
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UIntOvf), "conv_u8_ovf");
+                    }
+                    break;
+                case WellKnownType.UInt64:
+                    if (checkOverflow)
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2ULngOvf), "conv_u8_ovf");
+                    }
+                    else
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2ULng), "conv_u8");
+                    }
+                    break;
+                case WellKnownType.Single:
+                case WellKnownType.Double:
+                    _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ULng2Dbl), "conv_r");
                     break;
             }
         }
@@ -1388,7 +1435,6 @@ private static void ImportStoreIndirect(int token) { }
         private static void ImportStoreIndirect(TypeDesc type) { }
         private static void ImportShiftOperation(ILOpcode opcode) { }
         private static void ImportCompareOperation(ILOpcode opcode) { }
-        private static void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool unsigned) { }
         private static void ImportUnaryOperation(ILOpcode opCode) { }
         private static void ImportCpOpj(int token) { }
         private static void ImportCkFinite() { }
diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h
index c52c58954165a..eef4d60b64b87 100644
--- a/src/coreclr/vm/corelib.h
+++ b/src/coreclr/vm/corelib.h
@@ -259,11 +259,28 @@ DEFINE_FIELD(DELEGATE,            METHOD_PTR_AUX,         _methodPtrAux)
 DEFINE_METHOD(DELEGATE,             CONSTRUCT_DELEGATE,     DelegateConstruct,          IM_Obj_IntPtr_RetVoid)
 DEFINE_METHOD(DELEGATE,             GET_INVOKE_METHOD,      GetInvokeMethod,            IM_RetIntPtr)
 
-DEFINE_CLASS(INT128,               System,                 Int128)
-DEFINE_CLASS(UINT128,              System,                 UInt128)
+DEFINE_CLASS(INT128,                System,                 Int128)
+DEFINE_CLASS(UINT128,               System,                 UInt128)
+
+DEFINE_CLASS(MATH,                  System,                 Math)
+DEFINE_METHOD(MATH,                 LONG_MULTIPLY_OVERFLOW, LongMultiplyOverflow,       NoSig)
+DEFINE_METHOD(MATH,                 ULONG_MULTIPLY_OVERFLOW,ULongMultiplyOverflow,      NoSig)
+DEFINE_METHOD(MATH,                 ULONG_TO_DOUBLE,        ULongToDouble,              NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_ULONG,        DoubleToULong,              NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_INT_OVERFLOW, DoubleToIntOverflow,        NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_UINT_OVERFLOW,DoubleToUIntOverflow,       NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_LONG_OVERFLOW,DoubleToLongOverflow,       NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_ULONG_OVERFLOW,DoubleToULongOverflow,     NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_REMINDER,        DoubleReminder,             NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_INT,          DoubleToInt,                NoSig)
+DEFINE_METHOD(MATH,                 DOUBLE_TO_UINT,         DoubleToUInt,               NoSig)
+DEFINE_METHOD(MATH,                 ROUND,                  Round,                      SM_Dbl_RetDbl)
+
+DEFINE_CLASS(MATHF,                 System,                 MathF)
+DEFINE_METHOD(MATHF,                FLOAT_REMINDER,         FloatReminder,              NoSig)
+DEFINE_METHOD(MATHF,                ROUND,                  Round,                      SM_Flt_RetFlt)
 
 DEFINE_CLASS(DYNAMICMETHOD,         ReflectionEmit,         DynamicMethod)
-
 DEFINE_CLASS(DYNAMICRESOLVER,       ReflectionEmit,         DynamicResolver)
 DEFINE_FIELD(DYNAMICRESOLVER,       DYNAMIC_METHOD,         m_method)
 
diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp
index 7a9538d8ea7dd..fa8cf590c28d6 100644
--- a/src/coreclr/vm/ecall.cpp
+++ b/src/coreclr/vm/ecall.cpp
@@ -156,6 +156,62 @@ void ECall::PopulateManagedHelpers()
     pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMCOPY));
     pDest = pMD->GetMultiCallableAddrOfCode();
     SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__LONG_MULTIPLY_OVERFLOW));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_LMUL_OVF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ULONG_MULTIPLY_OVERFLOW));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_ULMUL_OVF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ULONG_TO_DOUBLE));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_ULNG2DBL, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_ULONG));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2ULNG, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_INT_OVERFLOW));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2INT_OVF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_UINT_OVERFLOW));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2UINT_OVF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_LONG_OVERFLOW));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2LNG_OVF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_ULONG_OVERFLOW));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2ULNG_OVF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_REMINDER));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBLREM, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATHF__FLOAT_REMINDER));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_FLTREM, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_INT));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2INT, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_UINT));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBL2UINT, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATHF__ROUND));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_FLTROUND, pDest);
 }
 
 static CrstStatic gFCallLock;
diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S
index c1da6f4dcb801..d027525202781 100644
--- a/src/coreclr/vm/i386/jithelp.S
+++ b/src/coreclr/vm/i386/jithelp.S
@@ -551,87 +551,6 @@ LOCAL_LABEL(LRszMORE32):
     ret
 LEAF_END JIT_LRsz, _TEXT
 
-// *********************************************************************/
-//  JIT_Dbl2LngP4x87
-//
-// Purpose:
-//   converts a double to a long truncating toward zero (C semantics)
-//
-// uses stdcall calling conventions
-//
-//   This code is faster on a P4 than the Dbl2Lng code above, but is
-//   slower on a PIII.  Hence we choose this code when on a P4 or above.
-//
-LEAF_ENTRY JIT_Dbl2LngP4x87, _TEXT
-    // get some local space
-    sub 	esp, 8
-
-    #define arg1 [esp + 0x0C]
-    fld     QWORD PTR arg1          // fetch arg
-    fnstcw  WORD PTR arg1           // store FPCW
-    movzx   eax, WORD PTR arg1      // zero extend - wide
-    or      ah, 0x0C                // turn on OE and DE flags
-    mov     DWORD PTR [esp], eax    // store new FPCW bits
-    fldcw   WORD PTR  [esp]         // reload FPCW with new bits
-    fistp   QWORD PTR [esp]         // convert
-
-    // reload FP result
-    mov	    eax, DWORD PTR [esp]
-    mov	    edx, DWORD PTR [esp + 4]
-
-    // reload original FPCW value
-    fldcw   WORD PTR arg1
-    #undef arg1
-
-    // restore stack
-    add     esp, 8
-
-    ret
-LEAF_END JIT_Dbl2LngP4x87, _TEXT
-
-// *********************************************************************/
-//  JIT_Dbl2LngSSE3
-//
-// Purpose:
-//    converts a double to a long truncating toward zero (C semantics)
-//
-//  uses stdcall calling conventions
-//
-//    This code is faster than the above P4 x87 code for Intel processors
-//    equal or later than Core2 and Atom that have SSE3 support
-//
-LEAF_ENTRY JIT_Dbl2LngSSE3, _TEXT
-    // get some local space
-    sub     esp, 8
-
-    fld     QWORD PTR [esp + 0x0C]   // fetch arg
-    fisttp  QWORD PTR [esp]         // convert
-    mov     eax, DWORD PTR [esp]    // reload FP result
-    mov     edx, DWORD PTR [esp + 4]
-
-    // restore stack
-    add     esp, 8
-
-    ret
-LEAF_END JIT_Dbl2LngSSE3, _TEXT
-
-// *********************************************************************/
-// JIT_Dbl2IntSSE2
-//
-// Purpose:
-//  converts a double to a long truncating toward zero (C semantics)
-//
-// uses stdcall calling conventions
-//
-// This code is even faster than the P4 x87 code for Dbl2LongP4x87,
-// but only returns a 32 bit value (only good for int).
-//
-LEAF_ENTRY JIT_Dbl2IntSSE2, _TEXT
-    movsd     xmm0, [esp + 4]
-    cvttsd2si eax, xmm0
-    ret
-LEAF_END JIT_Dbl2IntSSE2, _TEXT
-
 // *********************************************************************/
 // JIT_StackProbe
 //
diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm
index 5f6890b8312e0..b7446327d47f9 100644
--- a/src/coreclr/vm/i386/jithelp.asm
+++ b/src/coreclr/vm/i386/jithelp.asm
@@ -36,11 +36,7 @@ JIT_LLsh                        TEXTEQU <_JIT_LLsh@0>
 JIT_LRsh                        TEXTEQU <_JIT_LRsh@0>
 JIT_LRsz                        TEXTEQU <_JIT_LRsz@0>
 JIT_LMul                        TEXTEQU <@JIT_LMul@16>
-JIT_Dbl2LngOvf                  TEXTEQU <@JIT_Dbl2LngOvf@8>
 JIT_Dbl2Lng                     TEXTEQU <@JIT_Dbl2Lng@8>
-JIT_Dbl2IntSSE2                 TEXTEQU <@JIT_Dbl2IntSSE2@8>
-JIT_Dbl2LngP4x87                TEXTEQU <@JIT_Dbl2LngP4x87@8>
-JIT_Dbl2LngSSE3	                TEXTEQU <@JIT_Dbl2LngSSE3@8>
 JIT_InternalThrowFromHelper     TEXTEQU <@JIT_InternalThrowFromHelper@4>
 JIT_WriteBarrierReg_PreGrow     TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0>
 JIT_WriteBarrierReg_PostGrow    TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0>
@@ -635,181 +631,6 @@ LMul_hard:
 
 JIT_LMul ENDP
 
-;*********************************************************************/
-; JIT_Dbl2LngOvf
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;   with check for overflow
-;
-;       uses stdcall calling conventions
-;
-PUBLIC JIT_Dbl2LngOvf
-JIT_Dbl2LngOvf PROC
-        fnclex
-        fld     qword ptr [esp+4]
-        push    ecx
-        push    ecx
-        fstp    qword ptr [esp]
-        call    JIT_Dbl2Lng
-        mov     ecx,eax
-        fnstsw  ax
-        test    ax,01h
-        jnz     Dbl2LngOvf_throw
-        mov     eax,ecx
-        ret     8
-
-Dbl2LngOvf_throw:
-        mov     ECX, CORINFO_OverflowException_ASM
-        call    JIT_InternalThrowFromHelper
-        ret     8
-JIT_Dbl2LngOvf ENDP
-
-;*********************************************************************/
-; JIT_Dbl2Lng
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;       uses stdcall calling conventions
-;
-;   note that changing the rounding mode is very expensive.  This
-;   routine basiclly does the truncation semantics without changing
-;   the rounding mode, resulting in a win.
-;
-PUBLIC JIT_Dbl2Lng
-JIT_Dbl2Lng PROC
-        fld qword ptr[ESP+4]            ; fetch arg
-        lea ecx,[esp-8]
-        sub esp,16                      ; allocate frame
-        and ecx,-8                      ; align pointer on boundary of 8
-        fld st(0)                       ; duplciate top of stack
-        fistp qword ptr[ecx]            ; leave arg on stack, also save in temp
-        fild qword ptr[ecx]             ; arg, round(arg) now on stack
-        mov edx,[ecx+4]                 ; high dword of integer
-        mov eax,[ecx]                   ; low dword of integer
-        test eax,eax
-        je integer_QNaN_or_zero
-
-arg_is_not_integer_QNaN:
-        fsubp st(1),st                  ; TOS=d-round(d),
-                                        ; { st(1)=st(1)-st & pop ST }
-        test edx,edx                    ; what's sign of integer
-        jns positive
-                                        ; number is negative
-                                        ; dead cycle
-                                        ; dead cycle
-        fstp dword ptr[ecx]             ; result of subtraction
-        mov ecx,[ecx]                   ; dword of difference(single precision)
-        add esp,16
-        xor ecx,80000000h
-        add ecx,7fffffffh               ; if difference>0 then increment integer
-        adc eax,0                       ; inc eax (add CARRY flag)
-        adc edx,0                       ; propagate carry flag to upper bits
-        ret 8
-
-positive:
-        fstp dword ptr[ecx]             ;17-18 ; result of subtraction
-        mov ecx,[ecx]                   ; dword of difference (single precision)
-        add esp,16
-        add ecx,7fffffffh               ; if difference<0 then decrement integer
-        sbb eax,0                       ; dec eax (subtract CARRY flag)
-        sbb edx,0                       ; propagate carry flag to upper bits
-        ret 8
-
-integer_QNaN_or_zero:
-        test edx,7fffffffh
-        jnz arg_is_not_integer_QNaN
-        fstp st(0)                      ;; pop round(arg)
-        fstp st(0)                      ;; arg
-        add esp,16
-        ret 8
-JIT_Dbl2Lng ENDP
-
-;*********************************************************************/
-; JIT_Dbl2LngP4x87
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is faster on a P4 than the Dbl2Lng code above, but is
-;   slower on a PIII.  Hence we choose this code when on a P4 or above.
-;
-PUBLIC JIT_Dbl2LngP4x87
-JIT_Dbl2LngP4x87 PROC
-arg1	equ	<[esp+0Ch]>
-
-    sub 	esp, 8                  ; get some local space
-
-    fld	qword ptr arg1              ; fetch arg
-    fnstcw  word ptr arg1           ; store FPCW
-    movzx   eax, word ptr arg1      ; zero extend - wide
-    or	ah, 0Ch                     ; turn on OE and DE flags
-    mov	dword ptr [esp], eax        ; store new FPCW bits
-    fldcw   word ptr  [esp]         ; reload FPCW with new bits
-    fistp   qword ptr [esp]         ; convert
-    mov	eax, dword ptr [esp]        ; reload FP result
-    mov	edx, dword ptr [esp+4]      ;
-    fldcw   word ptr arg1           ; reload original FPCW value
-
-    add esp, 8                      ; restore stack
-
-    ret	8
-JIT_Dbl2LngP4x87 ENDP
-
-;*********************************************************************/
-; JIT_Dbl2LngSSE3
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is faster than the above P4 x87 code for Intel processors
-;   equal or later than Core2 and Atom that have SSE3 support
-;
-.686P
-.XMM
-PUBLIC JIT_Dbl2LngSSE3
-JIT_Dbl2LngSSE3 PROC
-arg1	equ	<[esp+0Ch]>
-
-    sub esp, 8                      ; get some local space
-
-    fld qword ptr arg1              ; fetch arg
-    fisttp qword ptr [esp]          ; convert
-    mov eax, dword ptr [esp]        ; reload FP result
-    mov edx, dword ptr [esp+4]
-
-    add esp, 8                      ; restore stack
-
-    ret	8
-JIT_Dbl2LngSSE3 ENDP
-.586
-
-;*********************************************************************/
-; JIT_Dbl2IntSSE2
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is even faster than the P4 x87 code for Dbl2LongP4x87,
-;   but only returns a 32 bit value (only good for int).
-;
-.686P
-.XMM
-PUBLIC JIT_Dbl2IntSSE2
-JIT_Dbl2IntSSE2 PROC
-	$movsd	xmm0, [esp+4]
-	cvttsd2si eax, xmm0
-	ret 8
-JIT_Dbl2IntSSE2 ENDP
-.586
-
 
 ;*********************************************************************/
 ; This is the small write barrier thunk we use when we know the
@@ -1212,39 +1033,6 @@ JIT_TailCallVSDLeave:
 
 JIT_TailCall ENDP
 
-
-;------------------------------------------------------------------------------
-
-; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
-@JIT_FltRem@8 proc public
-        fld  dword ptr [esp+4]          ; divisor
-        fld  dword ptr [esp+8]          ; dividend
-fremloop:
-        fprem
-        fstsw   ax
-        fwait
-        sahf
-        jp      fremloop        ; Continue while the FPU status bit C2 is set
-        fxch    ; swap, so divisor is on top and result is in st(1)
-        fstp    ST(0)           ; Pop the divisor from the FP stack
-        retn    8               ; Return value is in st(0)
-@JIT_FltRem@8 endp
-
-; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor)
-@JIT_DblRem@16 proc public
-        fld  qword ptr [esp+4]          ; divisor
-        fld  qword ptr [esp+12]         ; dividend
-fremloopd:
-        fprem
-        fstsw   ax
-        fwait
-        sahf
-        jp      fremloopd       ; Continue while the FPU status bit C2 is set
-        fxch    ; swap, so divisor is on top and result is in st(1)
-        fstp    ST(0)           ; Pop the divisor from the FP stack
-        retn    16              ; Return value is in st(0)
-@JIT_DblRem@16 endp
-
 ;------------------------------------------------------------------------------
 
 ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp
index 08360e9ff0c06..9ad2fdf112d95 100644
--- a/src/coreclr/vm/i386/jitinterfacex86.cpp
+++ b/src/coreclr/vm/i386/jitinterfacex86.cpp
@@ -96,25 +96,6 @@ extern "C" void STDCALL WriteBarrierAssert(BYTE* ptr, Object* obj)
 
 #endif // _DEBUG
 
-#ifndef TARGET_UNIX
-
-HCIMPL1_V(INT32, JIT_Dbl2IntOvf, double val)
-{
-    FCALL_CONTRACT;
-
-    INT64 ret = HCCALL1_V(JIT_Dbl2Lng, val);
-
-    if (ret != (INT32) ret)
-        goto THROW;
-
-    return (INT32) ret;
-
-THROW:
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-#endif // TARGET_UNIX
-
 
 FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_);
 
@@ -961,32 +942,6 @@ void InitJITHelpers1()
     JIT_TrialAlloc::Flags flags = GCHeapUtilities::UseThreadAllocationContexts() ?
         JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL;
 
-    // Get CPU features and check for SSE2 support.
-    // This code should eventually probably be moved into codeman.cpp,
-    // where we set the cpu feature flags for the JIT based on CPU type and features.
-    int cpuFeatures[4];
-    __cpuid(cpuFeatures, 1);
-
-    DWORD dwCPUFeaturesECX = cpuFeatures[2];
-    DWORD dwCPUFeaturesEDX = cpuFeatures[3];
-
-    //  If bit 26 (SSE2) is set, then we can use the SSE2 flavors
-    //  and faster x87 implementation for the P4 of Dbl2Lng.
-    if (dwCPUFeaturesEDX & (1<<26))
-    {
-        SetJitHelperFunction(CORINFO_HELP_DBL2INT, JIT_Dbl2IntSSE2);
-        if (dwCPUFeaturesECX & 1)  // check SSE3
-        {
-            SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngSSE3);
-            SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngSSE3);
-	}
-        else
-        {
-            SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngP4x87);   // SSE2 only for signed
-            SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngP4x87);
-        }
-    }
-
     if (!(TrackAllocationsEnabled()
         || LoggingOn(LF_GCALLOC, LL_INFO10)
 #ifdef _DEBUG
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 1da0211496029..092dc342547bb 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -91,6 +91,32 @@
 
 #include <optsmallperfcritical.h>
 
+#if !defined(HOST_64BIT) && !defined(TARGET_X86)
+/*********************************************************************/
+HCIMPL2_VV(UINT64, JIT_LLsh, UINT64 num, int shift)
+{
+    FCALL_CONTRACT;
+    return num << (shift & 0x3F);
+}
+HCIMPLEND
+
+/*********************************************************************/
+HCIMPL2_VV(INT64, JIT_LRsh, INT64 num, int shift)
+{
+    FCALL_CONTRACT;
+    return num >> (shift & 0x3F);
+}
+HCIMPLEND
+
+/*********************************************************************/
+HCIMPL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift)
+{
+    FCALL_CONTRACT;
+    return num >> (shift & 0x3F);
+}
+HCIMPLEND
+#endif // !HOST_64BIT && !TARGET_X86
+
 //
 // helper macro to multiply two 32-bit uints
 //
@@ -137,117 +163,6 @@ HCIMPL2_VV(INT64, JIT_LMul, INT64 val1, INT64 val2)
 HCIMPLEND
 #endif // !TARGET_X86 || TARGET_UNIX
 
-/*********************************************************************/
-HCIMPL2_VV(INT64, JIT_LMulOvf, INT64 val1, INT64 val2)
-{
-    FCALL_CONTRACT;
-
-    // This short-cut does not actually help since the multiplication
-    // of two 32-bit signed ints compiles into the call to a slow helper
-    // if (Is32BitSigned(val1) && Is32BitSigned(val2))
-    //     return (INT64)(INT32)val1 * (INT64)(INT32)val2;
-
-    INDEBUG(INT64 expected = val1 * val2;)
-    INT64 ret;
-
-        // Remember the sign of the result
-    INT32 sign = Hi32Bits(val1) ^ Hi32Bits(val2);
-
-        // Convert to unsigned multiplication
-    if (val1 < 0) val1 = -val1;
-    if (val2 < 0) val2 = -val2;
-
-        // Get the upper 32 bits of the numbers
-    UINT32 val1High = Hi32Bits(val1);
-    UINT32 val2High = Hi32Bits(val2);
-
-    UINT64 valMid;
-
-    if (val1High == 0) {
-        // Compute the 'middle' bits of the long multiplication
-        valMid = Mul32x32To64(val2High, val1);
-    }
-    else {
-        if (val2High != 0)
-            goto ThrowExcep;
-        // Compute the 'middle' bits of the long multiplication
-        valMid = Mul32x32To64(val1High, val2);
-    }
-
-        // See if any bits after bit 32 are set
-    if (Hi32Bits(valMid) != 0)
-        goto ThrowExcep;
-
-    ret = Mul32x32To64(val1, val2) + ShiftToHi32Bits((UINT32)(valMid));
-
-    // check for overflow
-    if (Hi32Bits(ret) < (UINT32)valMid)
-        goto ThrowExcep;
-
-    if (sign >= 0) {
-        // have we spilled into the sign bit?
-        if (ret < 0)
-            goto ThrowExcep;
-    }
-    else {
-        ret = -ret;
-        // have we spilled into the sign bit?
-        if (ret > 0)
-            goto ThrowExcep;
-    }
-    _ASSERTE(ret == expected);
-    return ret;
-
-ThrowExcep:
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-
-/*********************************************************************/
-HCIMPL2_VV(UINT64, JIT_ULMulOvf, UINT64 val1, UINT64 val2)
-{
-    FCALL_CONTRACT;
-
-    INDEBUG(UINT64 expected = val1 * val2;)
-    UINT64 ret;
-
-        // Get the upper 32 bits of the numbers
-    UINT32 val1High = Hi32Bits(val1);
-    UINT32 val2High = Hi32Bits(val2);
-
-    UINT64 valMid;
-
-    if (val1High == 0) {
-        if (val2High == 0)
-            return Mul32x32To64(val1, val2);
-        // Compute the 'middle' bits of the long multiplication
-        valMid = Mul32x32To64(val2High, val1);
-    }
-    else {
-        if (val2High != 0)
-            goto ThrowExcep;
-        // Compute the 'middle' bits of the long multiplication
-        valMid = Mul32x32To64(val1High, val2);
-    }
-
-        // See if any bits after bit 32 are set
-    if (Hi32Bits(valMid) != 0)
-        goto ThrowExcep;
-
-    ret = Mul32x32To64(val1, val2) + ShiftToHi32Bits((UINT32)(valMid));
-
-    // check for overflow
-    if (Hi32Bits(ret) < (UINT32)valMid)
-        goto ThrowExcep;
-
-    _ASSERTE(ret == expected);
-    return ret;
-
-ThrowExcep:
-        FCThrow(kOverflowException);
-    }
-HCIMPLEND
-
 /*********************************************************************/
 HCIMPL2(INT32, JIT_Div, INT32 dividend, INT32 divisor)
 {
@@ -450,32 +365,6 @@ HCIMPL2_VV(UINT64, JIT_ULMod, UINT64 dividend, UINT64 divisor)
 }
 HCIMPLEND
 
-#if !defined(HOST_64BIT) && !defined(TARGET_X86)
-/*********************************************************************/
-HCIMPL2_VV(UINT64, JIT_LLsh, UINT64 num, int shift)
-{
-    FCALL_CONTRACT;
-    return num << (shift & 0x3F);
-}
-HCIMPLEND
-
-/*********************************************************************/
-HCIMPL2_VV(INT64, JIT_LRsh, INT64 num, int shift)
-{
-    FCALL_CONTRACT;
-    return num >> (shift & 0x3F);
-}
-HCIMPLEND
-
-/*********************************************************************/
-HCIMPL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift)
-{
-    FCALL_CONTRACT;
-    return num >> (shift & 0x3F);
-}
-HCIMPLEND
-#endif // !HOST_64BIT && !TARGET_X86
-
 #include <optdefault.h>
 
 
@@ -488,282 +377,22 @@ HCIMPLEND
 #include <optsmallperfcritical.h>
 
 /*********************************************************************/
-//
-HCIMPL1_V(double, JIT_ULng2Dbl, UINT64 val)
-{
-    FCALL_CONTRACT;
-
-    double conv = (double) ((INT64) val);
-    if (conv < 0)
-        conv += (4294967296.0 * 4294967296.0);  // add 2^64
-    _ASSERTE(conv >= 0);
-    return(conv);
-}
-HCIMPLEND
-
-/*********************************************************************/
-// needed for ARM and RyuJIT-x86
+// needed for ARM and x86
 HCIMPL1_V(double, JIT_Lng2Dbl, INT64 val)
 {
     FCALL_CONTRACT;
-    return double(val);
+    return (double)val;
 }
 HCIMPLEND
 
-//--------------------------------------------------------------------------
-template <class ftype>
-ftype modftype(ftype value, ftype *iptr);
-template <> float modftype(float value, float *iptr) { return modff(value, iptr); }
-template <> double modftype(double value, double *iptr) { return modf(value, iptr); }
-
-// round to nearest, round to even if tied
-template <class ftype>
-ftype BankersRound(ftype value)
-{
-    if (value < 0.0) return -BankersRound <ftype> (-value);
-
-    ftype integerPart;
-    modftype( value, &integerPart );
-
-    // if decimal part is exactly .5
-    if ((value -(integerPart +0.5)) == 0.0)
-    {
-        // round to even
-        if (fmod(ftype(integerPart), ftype(2.0)) == 0.0)
-            return integerPart;
-
-        // Else return the nearest even integer
-        return (ftype)copysign(ceil(fabs(value+0.5)),
-                         value);
-    }
-
-    // Otherwise round to closest
-    return (ftype)copysign(floor(fabs(value)+0.5),
-                     value);
-}
-
-
-/*********************************************************************/
-// round double to nearest int (as double)
-HCIMPL1_V(double, JIT_DoubleRound, double val)
-{
-    FCALL_CONTRACT;
-    return BankersRound(val);
-}
-HCIMPLEND
-
-/*********************************************************************/
-// round float to nearest int (as float)
-HCIMPL1_V(float, JIT_FloatRound, float val)
-{
-    FCALL_CONTRACT;
-    return BankersRound(val);
-}
-HCIMPLEND
-
-/*********************************************************************/
-// Call fast Dbl2Lng conversion - used by functions below
-FORCEINLINE INT64 FastDbl2Lng(double val)
-{
-#ifdef TARGET_X86
-    FCALL_CONTRACT;
-    return HCCALL1_V(JIT_Dbl2Lng, val);
-#else
-    FCALL_CONTRACT;
-    return((__int64) val);
-#endif
-}
-
-/*********************************************************************/
-HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val)
-{
-    FCALL_CONTRACT;
-
-        // Note that this expression also works properly for val = NaN case
-    if (val > -1.0 && val < 4294967296.0)
-        return((UINT32)FastDbl2Lng(val));
-
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-
-/*********************************************************************/
-HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
-{
-    FCALL_CONTRACT;
-
-    const double two63  = 2147483648.0 * 4294967296.0;
-    UINT64 ret;
-    if (val < two63) {
-        ret = FastDbl2Lng(val);
-    }
-    else {
-        // subtract 0x8000000000000000, do the convert then add it back again
-        ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
-    }
-    return ret;
-}
-HCIMPLEND
-
-/*********************************************************************/
-HCIMPL1_V(UINT64, JIT_Dbl2ULngOvf, double val)
-{
-    FCALL_CONTRACT;
-
-    const double two64  = 4294967296.0 * 4294967296.0;
-        // Note that this expression also works properly for val = NaN case
-    if (val > -1.0 && val < two64) {
-        const double two63  = 2147483648.0 * 4294967296.0;
-        UINT64 ret;
-        if (val < two63) {
-            ret = FastDbl2Lng(val);
-        }
-        else {
-            // subtract 0x8000000000000000, do the convert then add it back again
-            ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
-        }
-#ifdef _DEBUG
-        // since no overflow can occur, the value always has to be within 1
-        double roundTripVal = HCCALL1_V(JIT_ULng2Dbl, ret);
-        _ASSERTE(val - 1.0 <= roundTripVal && roundTripVal <= val + 1.0);
-#endif // _DEBUG
-        return ret;
-    }
-
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-
-
-#if !defined(TARGET_X86) || defined(TARGET_UNIX)
-
 HCIMPL1_V(INT64, JIT_Dbl2Lng, double val)
 {
     FCALL_CONTRACT;
 
-    return((INT64)val);
+    return (INT64)val;
 }
 HCIMPLEND
 
-HCIMPL1_V(int, JIT_Dbl2IntOvf, double val)
-{
-    FCALL_CONTRACT;
-
-    const double two31 = 2147483648.0;
-
-        // Note that this expression also works properly for val = NaN case
-    if (val > -two31 - 1 && val < two31)
-        return((INT32)val);
-
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-
-HCIMPL1_V(INT64, JIT_Dbl2LngOvf, double val)
-{
-    FCALL_CONTRACT;
-
-    const double two63  = 2147483648.0 * 4294967296.0;
-
-    // Note that this expression also works properly for val = NaN case
-    // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-    if (val > -two63 - 0x402 && val < two63)
-        return((INT64)val);
-
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-
-#ifndef TARGET_WINDOWS
-namespace
-{
-    bool isnan(float val)
-    {
-        UINT32 bits = *reinterpret_cast<UINT32*>(&val);
-        return (bits & 0x7FFFFFFFU) > 0x7F800000U;
-    }
-    bool isnan(double val)
-    {
-        UINT64 bits = *reinterpret_cast<UINT64*>(&val);
-        return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL;
-    }
-    bool isfinite(float val)
-    {
-        UINT32 bits = *reinterpret_cast<UINT32*>(&val);
-        return (~bits & 0x7F800000U) != 0;
-    }
-    bool isfinite(double val)
-    {
-        UINT64 bits = *reinterpret_cast<UINT64*>(&val);
-        return (~bits & 0x7FF0000000000000ULL) != 0;
-    }
-}
-#endif
-
-HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
-{
-    FCALL_CONTRACT;
-
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-
-    if (divisor==0 || !isfinite(dividend))
-    {
-        UINT32 NaN = CLR_NAN_32;
-        return *(float *)(&NaN);
-    }
-    else if (!isfinite(divisor) && !isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-#if 0
-    // COMPILER BUG WITH FMODF() + /Oi, USE FMOD() INSTEAD
-    return fmodf(dividend,divisor);
-#else
-    return (float)fmod((double)dividend,(double)divisor);
-#endif
-}
-HCIMPLEND
-
-HCIMPL2_VV(double, JIT_DblRem, double dividend, double divisor)
-{
-    FCALL_CONTRACT;
-
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-    if (divisor==0 || !isfinite(dividend))
-    {
-        UINT64 NaN = CLR_NAN_64;
-        return *(double *)(&NaN);
-    }
-    else if (!isfinite(divisor) && !isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-    return(fmod(dividend,divisor));
-}
-HCIMPLEND
-
-#endif // !TARGET_X86 || TARGET_UNIX
-
 #include <optdefault.h>
 
 
diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp
index 5e6b0cbeeafdd..dbf4aa43de507 100644
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -10692,7 +10692,17 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMSET ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMZERO ||
-            dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY)
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_LMUL_OVF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULMUL_OVF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULNG2DBL ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2ULNG ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2INT_OVF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2UINT_OVF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2LNG_OVF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2ULNG_OVF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBLREM ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_FLTREM)
         {
             Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper);
             _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP);
@@ -10722,12 +10732,15 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
                     {
                         // Cache it for future uses to avoid taking the lock again.
                         hlpFinalTierAddrTable[dynamicFtnNum] = finalTierAddr;
+                        EE_TO_JIT_TRANSITION_LEAF();
                         return finalTierAddr;
                     }
                 }
             }
 
             *ppIndirection = ((FixupPrecode*)pPrecode)->GetTargetSlot();
+
+            EE_TO_JIT_TRANSITION_LEAF();
             return NULL;
         }
 
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index bbca5c355fbb9..fe11b7c93e9ba 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -325,17 +325,6 @@ EXTERN_C FCDECL2(Object*, JIT_NewArr1OBJ_MP_InlineGetThread, CORINFO_CLASS_HANDL
 
 EXTERN_C FCDECL2_VV(INT64, JIT_LMul, INT64 val1, INT64 val2);
 
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2Lng, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2IntSSE2, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngP4x87, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngSSE3, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngOvf, double val);
-
-EXTERN_C FCDECL1_V(INT32, JIT_Dbl2IntOvf, double val);
-
-EXTERN_C FCDECL2_VV(float, JIT_FltRem, float dividend, float divisor);
-EXTERN_C FCDECL2_VV(double, JIT_DblRem, double dividend, double divisor);
-
 #ifndef HOST_64BIT
 #ifdef TARGET_X86
 // JIThelp.asm
diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index 60a638198f8f6..34211170f9c3d 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1491,5 +1491,251 @@ public static double ScaleB(double x, int n)
             double u = BitConverter.Int64BitsToDouble(((long)(0x3ff + n) << 52));
             return y * u;
         }
+
+        [StackTraceHidden]
+        private static long LongMultiplyOverflow(long i, long j)
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static uint Hi32Bits(ulong a)
+            {
+                return (uint)(a >> 32);
+            }
+
+#if DEBUG
+            long result = i * j;
+#endif
+
+            // Remember the sign of the result
+            int sign = (int)(Hi32Bits((ulong)i) ^ Hi32Bits((ulong)j));
+
+            // Convert to unsigned multiplication
+            if (i < 0) i = -i;
+            if (j < 0) j = -j;
+
+            // Get the upper 32 bits of the numbers
+            uint val1High = Hi32Bits((ulong)i);
+            uint val2High = Hi32Bits((ulong)j);
+
+            ulong valMid;
+
+            if (val1High == 0)
+            {
+                // Compute the 'middle' bits of the long multiplication
+                valMid = BigMul(val2High, (uint)i);
+            }
+            else
+            {
+                if (val2High != 0)
+                    goto Overflow;
+                // Compute the 'middle' bits of the long multiplication
+                valMid = BigMul(val1High, (uint)j);
+            }
+
+            // See if any bits after bit 32 are set
+            if (Hi32Bits(valMid) != 0)
+                goto Overflow;
+
+            long ret = (long)(BigMul((uint)i, (uint)j) + (valMid << 32));
+
+            // check for overflow
+            if (Hi32Bits((ulong)ret) < (uint)valMid)
+                goto Overflow;
+
+            if (sign >= 0)
+            {
+                // have we spilled into the sign bit?
+                if (ret < 0)
+                    goto Overflow;
+            }
+            else
+            {
+                ret = -ret;
+                // have we spilled into the sign bit?
+                if (ret > 0)
+                    goto Overflow;
+            }
+
+#if DEBUG
+            Debug.Assert(ret == result, $"Multiply overflow got: {ret}, expected: {result}");
+#endif
+            return ret;
+
+        Overflow:
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [StackTraceHidden]
+        private static ulong ULongMultiplyOverflow(ulong i, ulong j)
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static uint Hi32Bits(ulong a)
+            {
+                return (uint)(a >> 32);
+            }
+
+            // Get the upper 32 bits of the numbers
+            uint val1High = Hi32Bits(i);
+            uint val2High = Hi32Bits(j);
+
+            ulong valMid;
+
+            if (val1High == 0)
+            {
+                if (val2High == 0)
+                    return BigMul((uint)i, (uint)j);
+                // Compute the 'middle' bits of the long multiplication
+                valMid = BigMul(val2High, (uint)i);
+            }
+            else
+            {
+                if (val2High != 0)
+                    goto Overflow;
+                // Compute the 'middle' bits of the long multiplication
+                valMid = BigMul(val1High, (uint)j);
+            }
+
+            // See if any bits after bit 32 are set
+            if (Hi32Bits(valMid) != 0)
+                goto Overflow;
+
+            ulong ret = BigMul((uint)i, (uint)j) + (valMid << 32);
+
+            // check for overflow
+            if (Hi32Bits(ret) < (uint)valMid)
+                goto Overflow;
+
+            Debug.Assert(ret == i * j, $"Multiply overflow got: {ret}, expected: {i * j}");
+            return ret;
+
+        Overflow:
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        private static double ULongToDouble(ulong val)
+        {
+            double conv = (long)val;
+            if (conv < 0)
+                conv += 4294967296.0 * 4294967296.0;  // add 2^64
+            Debug.Assert(conv >= 0);
+            return conv;
+        }
+
+        private static ulong DoubleToULong(double val)
+        {
+            const double two63 = 2147483648.0 * 4294967296.0;
+            ulong ret;
+            if (val < two63)
+            {
+                ret = (ulong)(long)val;
+            }
+            else
+            {
+                // subtract 0x8000000000000000, do the convert then add it back again
+                ret = (ulong)(long)(val - two63) + 0x8000000000000000UL;
+            }
+            return ret;
+        }
+
+        [StackTraceHidden]
+        private static int DoubleToIntOverflow(double val)
+        {
+            const double two31 = 2147483648.0;
+
+            // Note that this expression also works properly for val = NaN case
+            if (val is > -two31 - 1 and < two31)
+            {
+                int ret = (int)val;
+                // since no overflow can occur, the value always has to be within 1
+                Debug.Assert(val - 1.0 <= ret);
+                Debug.Assert(ret <= val + 1.0);
+                return ret;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [StackTraceHidden]
+        private static uint DoubleToUIntOverflow(double val)
+        {
+            // Note that this expression also works properly for val = NaN case
+            if (val is > -1.0 and < 4294967296.0)
+            {
+                uint ret = (uint)(long)val;
+                // since no overflow can occur, the value always has to be within 1
+                Debug.Assert(val - 1.0 <= ret);
+                Debug.Assert(ret <= val + 1.0);
+                return ret;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [StackTraceHidden]
+        private static long DoubleToLongOverflow(double val)
+        {
+            const double two63 = 2147483648.0 * 4294967296.0;
+
+            // Note that this expression also works properly for val = NaN case
+            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
+            if (val is > -two63 - 0x402 and < two63)
+            {
+                long ret = (long)val;
+                // since no overflow can occur, the value always has to be within 1
+                Debug.Assert(val - 1.0 <= ret);
+                Debug.Assert(ret <= val + 1.0);
+                return ret;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [StackTraceHidden]
+        private static ulong DoubleToULongOverflow(double val)
+        {
+            const double two64 = 4294967296.0 * 4294967296.0;
+            // Note that this expression also works properly for val = NaN case
+            if (val is > -1.0 and < two64)
+            {
+                ulong ret = (ulong)val;
+                // since no overflow can occur, the value always has to be within 1
+                Debug.Assert(val - 1.0 <= ret);
+                Debug.Assert(ret <= val + 1.0);
+                return ret;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        private static double DoubleReminder(double dividend, double divisor)
+        {
+            // From the ECMA standard:
+            //
+            // If [divisor] is zero or [dividend] is infinity
+            //   the result is NaN.
+            // If [divisor] is infinity,
+            //   the result is [dividend] (negated for -infinity***).
+            //
+            // ***"negated for -infinity" has been removed from the spec
+            if (divisor == 0 || !double.IsFinite(dividend))
+            {
+                return double.NaN;
+            }
+
+            if (!double.IsFinite(divisor) && !double.IsNaN(divisor))
+            {
+                return dividend;
+            }
+
+            return FMod(dividend, divisor);
+        }
+
+        private static int DoubleToInt(double val) => (int)(long)val;
+        private static uint DoubleToUInt(double val) => (uint)(long)val;
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/MathF.cs b/src/libraries/System.Private.CoreLib/src/System/MathF.cs
index cc0795255d0c8..d58e059d26c26 100644
--- a/src/libraries/System.Private.CoreLib/src/System/MathF.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/MathF.cs
@@ -511,5 +511,28 @@ public static float ScaleB(float x, int n)
             float u = BitConverter.Int32BitsToSingle(((int)(0x7f + n) << 23));
             return y * u;
         }
+
+        private static float FloatReminder(float dividend, float divisor)
+        {
+            // From the ECMA standard:
+            //
+            // If [divisor] is zero or [dividend] is infinity
+            //   the result is NaN.
+            // If [divisor] is infinity,
+            //   the result is [dividend] (negated for -infinity***).
+            //
+            // ***"negated for -infinity" has been removed from the spec
+            if (divisor == 0 || !float.IsFinite(dividend))
+            {
+                return float.NaN;
+            }
+
+            if (!float.IsFinite(divisor) && !float.IsNaN(divisor))
+            {
+                return dividend;
+            }
+
+            return FMod(dividend, divisor);
+        }
     }
 }

From fe179dffd349a45b4be34d200f4d56931cc11689 Mon Sep 17 00:00:00 2001
From: Deepak Rajendrakumaran <deepak.rajendrakumaran@intel.com>
Date: Thu, 29 Feb 2024 16:02:00 -0800
Subject: [PATCH 02/14] Adding new instructions.

---
 src/coreclr/jit/emit.h        |  6 +++--
 src/coreclr/jit/emitxarch.cpp | 41 ++++++++++++++++++++---------------
 src/coreclr/jit/instr.cpp     | 16 ++++++++++----
 src/coreclr/jit/instrsxarch.h |  9 +++++---
 4 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index e5540a290b4c8..9ce5d038c3c1b 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -3999,7 +3999,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
         case INS_comiss:
         case INS_cvtss2sd:
         case INS_cvtss2si:
-        case INS_cvttss2si:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_divss:
         case INS_extractps:
         case INS_insertps:
@@ -4042,7 +4043,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
         case INS_comisd:
         case INS_cvtsd2si:
         case INS_cvtsd2ss:
-        case INS_cvttsd2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
         case INS_divsd:
         case INS_maxsd:
         case INS_minsd:
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 1bafb6796d807..91ece88fac16f 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1522,9 +1522,11 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
         switch (ins)
         {
             case INS_cvtss2si:
-            case INS_cvttss2si:
+            case INS_cvttss2si32:
+            case INS_cvttss2si64:
             case INS_cvtsd2si:
-            case INS_cvttsd2si:
+            case INS_cvttsd2si32:
+            case INS_cvttsd2si64:
             case INS_movd:
             case INS_movnti:
             case INS_andn:
@@ -1544,7 +1546,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
 #endif // TARGET_AMD64
             case INS_vcvtsd2usi:
             case INS_vcvtss2usi:
-            case INS_vcvttsd2usi:
             {
                 if (attr == EA_8BYTE)
                 {
@@ -2723,8 +2724,10 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
         case INS_blsmsk:
         case INS_blsr:
         case INS_bzhi:
-        case INS_cvttsd2si:
-        case INS_cvttss2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_cvtsd2si:
         case INS_cvtss2si:
         case INS_extractps:
@@ -2748,7 +2751,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
 #endif
         case INS_vcvtsd2usi:
         case INS_vcvtss2usi:
-        case INS_vcvttsd2usi:
+        case INS_vcvttsd2usi32:
+        case INS_vcvttsd2usi64:
         case INS_vcvttss2usi32:
         case INS_vcvttss2usi64:
         {
@@ -11605,22 +11609,20 @@ void emitter::emitDispIns(
                     break;
                 }
 
-                case INS_cvttsd2si:
+                case INS_cvttsd2si32:
+                case INS_cvttsd2si64:
                 case INS_cvtss2si:
                 case INS_cvtsd2si:
-                case INS_cvttss2si:
+                case INS_cvttss2si32:
+                case INS_cvttss2si64:
                 case INS_vcvtsd2usi:
                 case INS_vcvtss2usi:
-                case INS_vcvttsd2usi:
-                {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
-                    break;
-                }
-
+                case INS_vcvttsd2usi32:
+                case INS_vcvttsd2usi64:
                 case INS_vcvttss2usi32:
                 case INS_vcvttss2usi64:
                 {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE));
+                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
                     break;
                 }
 
@@ -19050,7 +19052,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             break;
         }
 
-        case INS_cvttsd2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
         case INS_cvtsd2si:
         case INS_cvtsi2sd32:
         case INS_cvtsi2ss32:
@@ -19059,7 +19062,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_vcvtsd2usi:
         case INS_vcvtusi2ss32:
         case INS_vcvtusi2ss64:
-        case INS_vcvttsd2usi:
+        case INS_vcvttsd2usi32:
+        case INS_vcvttsd2usi64:
         case INS_vcvttss2usi32:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_7C;
@@ -19071,7 +19075,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             result.insLatency += PERFSCORE_LATENCY_5C;
             break;
 
-        case INS_cvttss2si:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_cvtss2si:
         case INS_vcvtss2usi:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 3d307ddfe7d96..a68f72cf37a6c 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2378,13 +2378,17 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
             switch (to)
             {
                 case TYP_INT:
-                    return INS_cvttss2si;
+                    return INS_cvttss2si32;
                 case TYP_LONG:
-                    return INS_cvttss2si;
+                    return INS_cvttss2si64;
                 case TYP_FLOAT:
                     return ins_Move_Extend(TYP_FLOAT, false);
                 case TYP_DOUBLE:
                     return INS_cvtss2sd;
+                case TYP_ULONG:
+                    return INS_vcvttss2usi64;
+                case TYP_UINT:
+                    return INS_vcvttss2usi32;
                 default:
                     unreached();
             }
@@ -2394,13 +2398,17 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
             switch (to)
             {
                 case TYP_INT:
-                    return INS_cvttsd2si;
+                    return INS_cvttsd2si32;
                 case TYP_LONG:
-                    return INS_cvttsd2si;
+                    return INS_cvttsd2si64;
                 case TYP_FLOAT:
                     return INS_cvtsd2ss;
                 case TYP_DOUBLE:
                     return ins_Move_Extend(TYP_DOUBLE, false);
+                case TYP_ULONG:
+                    return INS_vcvttsd2usi64;
+                case TYP_UINT:
+                    return INS_vcvttsd2usi32;
                 default:
                     unreached();
             }
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 17443cb978492..8cc89d61cda79 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -201,7 +201,8 @@ INST3(comiss,           "comiss",           IUM_RD, BAD_CODE,     BAD_CODE,
 INST3(cvtsi2ss32,       "cvtsi2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt DWORD to scalar single
 INST3(cvtsi2ss64,       "cvtsi2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt QWORD to scalar single
 INST3(cvtss2si,         "cvtss2si",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2D),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt scalar single to DWORD/QWORD
-INST3(cvttss2si,        "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
+INST3(cvttss2si32,      "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
+INST3(cvttss2si64,      "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
 INST3(divps,            "divps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5E),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Divide packed singles
 INST3(divss,            "divss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5E),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Divide scalar singles
 INST3(maxps,            "maxps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5F),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Return Maximum packed singles
@@ -260,7 +261,8 @@ INST3(cvtsi2sd64,       "cvtsi2sd",         IUM_WR, BAD_CODE,     BAD_CODE,
 INST3(cvtss2sd,         "cvtss2sd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar single to scalar doubles
 INST3(cvttpd2dq,        "cvttpd2dq",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE6),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc packed doubles to DWORDs
 INST3(cvttps2dq,        "cvttps2dq",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5B),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc packed singles to DWORDs
-INST3(cvttsd2si,        "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
+INST3(cvttsd2si32,      "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
+INST3(cvttsd2si64,      "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
 INST3(divpd,            "divpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5E),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Divide packed doubles
 INST3(divsd,            "divsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5E),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Divide scalar doubles
 INST3(lfence,           "lfence",           IUM_RD, 0x000FE8AE,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
@@ -640,7 +642,8 @@ INST3(vcvtsd2usi,       "cvtsd2usi",        IUM_WR, BAD_CODE,               BAD_
 INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
 INST3(vcvttpd2udq,      "cvttpd2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned DWORDs
 INST3(vcvttps2udq,      "cvttps2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned DWORDs
-INST3(vcvttsd2usi,      "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD/QWORD
+INST3(vcvttsd2usi32,    "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD
+INST3(vcvttsd2usi64,    "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned QWORD
 INST3(vcvttss2usi32,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
 INST3(vcvttss2usi64,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
 INST3(vcvtudq2pd,       "cvtudq2pd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7A),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed unsigned DWORDs to doubles

From 7885cdcb170ffba5c304d2632284e3ae509351d5 Mon Sep 17 00:00:00 2001
From: Deepak Rajendrakumaran <deepak.rajendrakumaran@intel.com>
Date: Thu, 29 Feb 2024 16:10:03 -0800
Subject: [PATCH 03/14] Lowering intrinsics

---
 src/coreclr/jit/assertionprop.cpp      |   1 +
 src/coreclr/jit/gentree.h              |  12 ++
 src/coreclr/jit/hwintrinsiclistxarch.h |  20 +-
 src/coreclr/jit/hwintrinsicxarch.cpp   | 288 +++++++++++++++++++++----
 src/coreclr/jit/lowerxarch.cpp         |   4 +-
 5 files changed, 279 insertions(+), 46 deletions(-)

diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index 51a0d5f271e05..010f8715c2774 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -83,6 +83,7 @@ bool IntegralRange::Contains(int64_t value) const
     {
         case TYP_UBYTE:
         case TYP_USHORT:
+        case TYP_UINT:
             return SymbolicIntegerValue::Zero;
         case TYP_BYTE:
             return SymbolicIntegerValue::ByteMin;
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 328860eb1713f..000c134c346e7 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -556,6 +556,8 @@ enum GenTreeFlags : unsigned int
 #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
     GTF_HW_EM_OP                  = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
 #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
+
+    GTF_CONVERSION_SATURATED    = 0x20000000, // GT_CAST -- conversion operation has saturation behavior
 };
 
 inline constexpr GenTreeFlags operator ~(GenTreeFlags a)
@@ -3886,6 +3888,16 @@ struct GenTreeCast : public GenTreeOp
 
         return false;
     }
+
+    bool IsSaturatedConversion()
+    {
+        return (gtFlags & GTF_CONVERSION_SATURATED) != 0;
+    }
+
+    void SetSaturatedConversion()
+    {
+        gtFlags |= GTF_CONVERSION_SATURATED;
+    }
 };
 
 // GT_BOX nodes are place markers for boxed values.  The "real" tree
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 3093c9ff71a56..660186cefa413 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -273,8 +273,12 @@ HARDWARE_INTRINSIC(Vector512,       Create,
 HARDWARE_INTRINSIC(Vector512,       CreateScalar,                               64,            -1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector512,       CreateScalarUnsafe,                         64,             1,       true,  {INS_movd,              INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movss,              INS_movsd_simd},        HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector512,       CreateSequence,                             64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector512,       ConvertToDouble,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       ConvertToSingle,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       ConvertToInt32,                             64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512,       ConvertToInt64,                             64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512,       ConvertToUInt32,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512,       ConvertToUInt64,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       Divide,                                     64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector512,       Equals,                                     64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector512,       EqualsAll,                                  64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -411,7 +415,7 @@ HARDWARE_INTRINSIC(SSE,             CompareUnordered,
 HARDWARE_INTRINSIC(SSE,             CompareScalarUnordered,                     16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
 HARDWARE_INTRINSIC(SSE,             ConvertToInt32,                             16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss32,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE,             ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si32,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             Divide,                                     16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(SSE,             DivideScalar,                               16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             LoadAlignedVector128,                       16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movaps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
@@ -460,7 +464,7 @@ HARDWARE_INTRINSIC(SSE,             Xor,
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE 64-bit-only Intrinsics
 HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64,                             16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si64,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(SSE_X64,         ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss64,         INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
@@ -511,7 +515,7 @@ HARDWARE_INTRINSIC(SSE2,            CompareScalarOrdered,
 HARDWARE_INTRINSIC(SSE2,            CompareUnordered,                           16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnordered,                     16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToInt32,                             16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si32},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToUInt32,                            16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Double,                   16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Double,             16,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd32,         INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2sd,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
@@ -578,7 +582,7 @@ HARDWARE_INTRINSIC(SSE2,            Xor,
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE2 64-bit-only Intrinsics
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64,                             16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si64},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertToUInt64,                            16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Double,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd64,         INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Int64,              16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
@@ -708,9 +712,9 @@ HARDWARE_INTRINSIC(AVX,             CompareNotLessThanOrEqual,
 HARDWARE_INTRINSIC(AVX,             CompareOrdered,                             32,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(AVX,             CompareUnordered,                           32,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(AVX,             CompareScalar,                              16,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_cmpsd},             HW_Category_IMM,                    HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
-HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector128Single,                   32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector256Single,                   32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector256Double,                   32,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32WithTruncation,      32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttpd2dq,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -861,7 +865,7 @@ HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Double,
 HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Single,             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss32,         INS_vcvtusi2ss32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2ss},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToInt32,                             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32,                            16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi32,      INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi32,      INS_vcvttsd2usi32},     HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128Byte,                     64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovdb,            INS_vpmovdb,            INS_vpmovqb,            INS_vpmovqb,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128ByteWithSaturation,       64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovusdb,          INS_invalid,            INS_vpmovusqb,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128Int16,                    64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqw,            INS_vpmovqw,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
@@ -1027,7 +1031,7 @@ HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Double,
 HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Single,             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss64,         INS_vcvtusi2ss64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToInt64,                             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64,                            16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
-HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi64,      INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi64,      INS_vcvttsd2usi64},     HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                               SIMD size       NumArg  EncodesExtraTypeArg                                                                                                       Instructions                                                                                                                  Category                            Flags
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index f88cf6ec99ec3..7f6469f992916 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -1415,15 +1415,190 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
         case NI_Vector128_ConvertToDouble:
         case NI_Vector256_ConvertToDouble:
+        case NI_Vector512_ConvertToDouble:
+        {
+            assert(sig->numArgs == 1);
+            assert(varTypeIsLong(simdBaseType) || simdBaseType == TYP_FLOAT);
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                if (varTypeIsLong(simdBaseType))
+                {
+                    intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Double
+                                                 : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Double
+                                                                    : NI_AVX512DQ_ConvertToVector512Double;
+                }
+                else
+                {
+                    intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Double
+                                                 : (simdSize == 32) ? NI_AVX_ConvertToVector256Double
+                                                                    : NI_AVX512F_ConvertToVector512Double;
+                }
+
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+            }
+            break;
+        }
+
         case NI_Vector128_ConvertToInt64:
         case NI_Vector256_ConvertToInt64:
+        case NI_Vector512_ConvertToInt64:
+        {
+            assert(sig->numArgs == 1);
+            assert(simdBaseType == TYP_DOUBLE);
+#ifdef TARGET_AMD64
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                op1 = impSIMDPopStack();
+
+                var_types simdType = getSIMDTypeForSize(simdSize);
+                // Generate the control table for VFIXUPIMMSD
+                // The behavior we want is to saturate negative values to 0.
+                GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                // QNAN: 0b1000: Saturate to Zero
+                // SNAN: 0b1000: Saturate to Zero
+                // ZERO: 0b0000
+                // +ONE: 0b0000
+                // -INF: 0b0000
+                // +INF: 0b0000
+                // -VAL: 0b0000
+                // +VAL: 0b0000
+                for (int i = 0; i < 8; i++)
+                {
+                    tbl->gtSimdVal.i64[i] = 0x00000088;
+                }
+
+                // Generate first operand
+                // The logic is that first and second operand are basically the same because we want
+                // the output to be in the same xmm register
+                // Hence we clone the first operand
+                GenTree* op2Clone = fgMakeMultiUse(&op1);
+
+                // run vfixupimmsd base on table and no flags reporting
+                GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                                 NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                GenTree* max_val =
+                    gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast<double>(INT64_MAX)),
+                                                 simdBaseJitType, simdSize);
+                GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG),
+                                                                   CORINFO_TYPE_LONG, simdSize);
+                // we will be using the input value twice
+                GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
+
+                // usage 1 --> compare with max value of integer
+                saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
+                // cast it
+
+                intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
+
+                // usage 2 --> use thecompared mask with input value and max value to blend
+                retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG, simdSize);
+            }
+#endif // TARGET_AMD64
+            break;
+        }
+
         case NI_Vector128_ConvertToUInt32:
         case NI_Vector256_ConvertToUInt32:
+        case NI_Vector512_ConvertToUInt32:
+        {
+            assert(sig->numArgs == 1);
+            assert(varTypeIsFloating(simdBaseType));
+#ifdef TARGET_AMD64
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                op1 = impSIMDPopStack();
+
+                var_types simdType = getSIMDTypeForSize(simdSize);
+                // Generate the control table for VFIXUPIMMSD
+                // The behavior we want is to saturate negative values to 0.
+                GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                // QNAN: 0b1000:
+                // SNAN: 0b1000
+                // ZERO: 0b0000:
+                // +ONE: 0b0000
+                // -INF: 0b0000
+                // +INF: 0b0000
+                // -VAL: 0b1000: Saturate to Zero
+                // +VAL: 0b0000
+                for (int i = 0; i < 16; i++)
+                {
+                    tbl->gtSimdVal.i32[i] = 0x08000088;
+                }
+
+                // Generate first operand
+                // The logic is that first and second operand are basically the same because we want
+                // the output to be in the same xmm register
+                // Hence we clone the first operand
+                GenTree* op2Clone = fgMakeMultiUse(&op1);
+
+                // run vfixupimmsd base on table and no flags reporting
+                GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                             NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+            }
+#endif // TARGET_AMD64
+            break;
+        }
+
         case NI_Vector128_ConvertToUInt64:
         case NI_Vector256_ConvertToUInt64:
+        case NI_Vector512_ConvertToUInt64:
         {
             assert(sig->numArgs == 1);
-            // TODO-XARCH-CQ: These intrinsics should be accelerated
+            assert(simdBaseType == TYP_DOUBLE);
+#ifdef TARGET_AMD64
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                op1 = impSIMDPopStack();
+
+                var_types simdType = getSIMDTypeForSize(simdSize);
+                // Generate the control table for VFIXUPIMMSD
+                // The behavior we want is to saturate negative values to 0.
+                GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                // QNAN: 0b1000:
+                // SNAN: 0b1000
+                // ZERO: 0b0000:
+                // +ONE: 0b0000
+                // -INF: 0b0000
+                // +INF: 0b0000
+                // -VAL: 0b1000: Saturate to Zero
+                // +VAL: 0b0000
+                for (int i = 0; i < 8; i++)
+                {
+                    tbl->gtSimdVal.i64[i] = 0x08000088;
+                }
+
+                // Generate first operand
+                // The logic is that first and second operand are basically the same because we want
+                // the output to be in the same xmm register
+                // Hence we clone the first operand
+                GenTree* op2Clone = fgMakeMultiUse(&op1);
+
+                // run vfixupimmsd base on table and no flags reporting
+                GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                             NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+            }
+#endif // TARGET_AMD64
             break;
         }
 
@@ -1433,24 +1608,63 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         {
             assert(sig->numArgs == 1);
             assert(simdBaseType == TYP_FLOAT);
-
-            switch (simdSize)
+#ifdef TARGET_AMD64
+            if (IsBaselineVector512IsaSupportedOpportunistically())
             {
-                case 16:
-                    intrinsic = NI_SSE2_ConvertToVector128Int32WithTruncation;
-                    break;
-                case 32:
-                    intrinsic = NI_AVX_ConvertToVector256Int32WithTruncation;
-                    break;
-                case 64:
-                    intrinsic = NI_AVX512F_ConvertToVector512Int32WithTruncation;
-                    break;
-                default:
-                    unreached();
-            }
+                op1 = impSIMDPopStack();
 
-            op1     = impSIMDPopStack();
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+                var_types simdType = getSIMDTypeForSize(simdSize);
+                // Generate the control table for VFIXUPIMMSD
+                // The behavior we want is to saturate negative values to 0.
+                GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                // QNAN: 0b1000: Saturate to Zero
+                // SNAN: 0b1000: Saturate to Zero
+                // ZERO: 0b0000
+                // +ONE: 0b0000
+                // -INF: 0b0000
+                // +INF: 0b0000
+                // -VAL: 0b0000
+                // +VAL: 0b0000
+                for (int i = 0; i < 16; i++)
+                {
+                    tbl->gtSimdVal.i32[i] = 0x00000088;
+                }
+
+                // Generate first operand
+                // The logic is that first and second operand are basically the same because we want
+                // the output to be in the same xmm register
+                // Hence we clone the first operand
+                GenTree* op2Clone = fgMakeMultiUse(&op1);
+                // GenTree* op2Clone;
+                // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL,
+                //                     nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion"));
+
+                // run vfixupimmsd base on table and no flags reporting
+                GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                                 NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                GenTree* max_val = gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast<float>(INT32_MAX)),
+                                                                simdBaseJitType, simdSize);
+                GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT),
+                                                                   CORINFO_TYPE_INT, simdSize);
+                // we will be using the input value twice
+                GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
+
+                // usage 1 --> compare with max value of integer
+                saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
+                // cast it
+
+                intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
+                                             : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512Int32WithTruncation;
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
+
+                // usage 2 --> use thecompared mask with input value and max value to blend
+                retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT, simdSize);
+            }
+#endif // TARGET_AMD64
             break;
         }
 
@@ -1459,31 +1673,33 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector512_ConvertToSingle:
         {
             assert(sig->numArgs == 1);
-
+            assert(varTypeIsInt(simdBaseType));
+            intrinsic = NI_Illegal;
             if (simdBaseType == TYP_INT)
             {
-                switch (simdSize)
+                if (simdSize == 16)
                 {
-                    case 16:
-                        intrinsic = NI_SSE2_ConvertToVector128Single;
-                        break;
-                    case 32:
-                        intrinsic = NI_AVX_ConvertToVector256Single;
-                        break;
-                    case 64:
-                        intrinsic = NI_AVX512F_ConvertToVector512Single;
-                        break;
-                    default:
-                        unreached();
+                    intrinsic = NI_SSE2_ConvertToVector128Single;
+                }
+                else if (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX))
+                {
+                    intrinsic = NI_AVX_ConvertToVector256Single;
+                }
+                else if (simdSize == 64 && IsBaselineVector512IsaSupportedOpportunistically())
+                {
+                    intrinsic = NI_AVX512F_ConvertToVector512Single;
                 }
-
-                op1     = impSIMDPopStack();
-                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
             }
-            else
+            else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single
+                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single
+                                                                : NI_AVX512F_ConvertToVector512Single;
+            }
+            if (intrinsic != NI_Illegal)
             {
-                // TODO-XARCH-CQ: These intrinsics should be accelerated
-                assert(simdBaseType == TYP_UINT);
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
             }
             break;
         }
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 811657c9a5219..ed1eccc2144af 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -843,7 +843,7 @@ void Lowering::LowerCast(GenTree* tree)
     if (varTypeIsFloating(srcType))
     {
         noway_assert(!tree->gtOverflow());
-        noway_assert(castToType != TYP_ULONG);
+        assert(castToType != TYP_ULONG || comp->IsBaselineVector512IsaSupportedDebugOnly());
     }
     else if (srcType == TYP_UINT)
     {
@@ -851,7 +851,7 @@ void Lowering::LowerCast(GenTree* tree)
     }
     else if (srcType == TYP_ULONG)
     {
-        assert(castToType != TYP_FLOAT || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        assert(castToType != TYP_FLOAT || comp->IsBaselineVector512IsaSupportedDebugOnly());
     }
 
     // Case of src is a small type and dst is a floating point type.

From 064fdb73176b61dfe2b36724aa916245de33ae9c Mon Sep 17 00:00:00 2001
From: Deepak Rajendrakumaran <deepak.rajendrakumaran@intel.com>
Date: Thu, 29 Feb 2024 18:29:44 -0800
Subject: [PATCH 04/14] Handling behaviour + fixing R2R etc

# Conflicts:
#	src/coreclr/inc/jithelpers.h
#	src/coreclr/jit/morph.cpp
#	src/coreclr/jit/utils.cpp
#	src/coreclr/jit/valuenum.cpp
#	src/coreclr/nativeaot/Runtime/MathHelpers.cpp
#	src/coreclr/vm/jithelpers.cpp
---
 docs/design/coreclr/botr/readytorun-format.md |   1 +
 src/coreclr/inc/corinfo.h                     |   1 +
 src/coreclr/inc/jithelpers.h                  |   1 +
 src/coreclr/inc/readytorun.h                  |   1 +
 src/coreclr/inc/readytorunhelpers.h           |   1 +
 src/coreclr/jit/codegenxarch.cpp              |  11 +-
 src/coreclr/jit/morph.cpp                     | 167 +++++++++-
 src/coreclr/jit/simdashwintrinsic.cpp         | 313 ++++++++++++++++--
 src/coreclr/jit/utils.cpp                     |   1 +
 src/coreclr/jit/valuenum.cpp                  |   6 +
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp |  47 +++
 .../Internal/Runtime/ReadyToRunConstants.cs   |   1 +
 .../Common/JitInterface/CorInfoHelpFunc.cs    |   1 +
 .../ILCompiler.Compiler/Compiler/JitHelper.cs |   3 +
 .../JitInterface/CorInfoImpl.ReadyToRun.cs    |   3 +
 .../ReadyToRunSignature.cs                    |   4 +
 .../JitInterface/CorInfoImpl.RyuJit.cs        |   3 +
 17 files changed, 517 insertions(+), 48 deletions(-)

diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md
index a9a5c8b916303..c222ab299e54b 100644
--- a/docs/design/coreclr/botr/readytorun-format.md
+++ b/docs/design/coreclr/botr/readytorun-format.md
@@ -870,6 +870,7 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_Dbl2UIntOvf               = 0xD5,
     READYTORUN_HELPER_Dbl2ULng                  = 0xD6,
     READYTORUN_HELPER_Dbl2ULngOvf               = 0xD7,
+    READYTORUN_HELPER_Flt2UInt                  = 0xD8,
 
     // Floating point ops
     READYTORUN_HELPER_DblRem                    = 0xE0,
diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index b4417e60af4d5..a935dd2e53f2a 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -393,6 +393,7 @@ enum CorInfoHelpFunc
     CORINFO_HELP_DBL2LNG_OVF,
     CORINFO_HELP_DBL2UINT,              // unused
     CORINFO_HELP_DBL2UINT_OVF,
+    CORINFO_HELP_FLT2UINT,
     CORINFO_HELP_DBL2ULNG,
     CORINFO_HELP_DBL2ULNG_OVF,
     CORINFO_HELP_FLTREM,
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index e0f65ff5de908..8cb878a14a210 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -59,6 +59,7 @@
     DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG_OVF,  NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT_OVF, NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_FLT2UINT,     NULL,               CORINFO_HELP_SIG_8_STACK) //Deepak
     DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG,     NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG_OVF, NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_FLTREM,       NULL,               CORINFO_HELP_SIG_8_STACK)
diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h
index 41a4aa251fa74..0c84b7f3baef0 100644
--- a/src/coreclr/inc/readytorun.h
+++ b/src/coreclr/inc/readytorun.h
@@ -399,6 +399,7 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_Dbl2UIntOvf               = 0xD5,
     READYTORUN_HELPER_Dbl2ULng                  = 0xD6,
     READYTORUN_HELPER_Dbl2ULngOvf               = 0xD7,
+    READYTORUN_HELPER_Flt2UInt                  = 0xD8,
 
     // Floating point ops
     READYTORUN_HELPER_DblRem                    = 0xE0,
diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h
index bbb586e8eb4a3..695d9d886e261 100644
--- a/src/coreclr/inc/readytorunhelpers.h
+++ b/src/coreclr/inc/readytorunhelpers.h
@@ -84,6 +84,7 @@ HELPER(READYTORUN_HELPER_Dbl2Lng,                   CORINFO_HELP_DBL2LNG,
 HELPER(READYTORUN_HELPER_Dbl2LngOvf,                CORINFO_HELP_DBL2LNG_OVF,                       )
 HELPER(READYTORUN_HELPER_Dbl2UInt,                  CORINFO_HELP_DBL2UINT,                          )
 HELPER(READYTORUN_HELPER_Dbl2UIntOvf,               CORINFO_HELP_DBL2UINT_OVF,                      )
+HELPER(READYTORUN_HELPER_Flt2UInt,                  CORINFO_HELP_FLT2UINT,                          )
 HELPER(READYTORUN_HELPER_Dbl2ULng,                  CORINFO_HELP_DBL2ULNG,                          )
 HELPER(READYTORUN_HELPER_Dbl2ULngOvf,               CORINFO_HELP_DBL2ULNG_OVF,                      )
 
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 223199f35c327..4370a7cf5fc4d 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7641,13 +7641,16 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
     // We shouldn't be seeing uint64 here as it should have been converted
-    // into a helper call by either front-end or lowering phase.
-    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+    // into a helper call by either front-end or lowering phase, unless we have AVX512F
+    // accelerated conversions.
+    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
+           compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
     // To achieve it we pretend as if we are converting it to a long.
-    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
+        !compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
         dstType = TYP_LONG;
     }
@@ -7655,7 +7658,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // Note that we need to specify dstType here so that it will determine
     // the size of destination integer register and also the rex.w prefix.
     genConsumeOperands(treeNode->AsOp());
-    instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
+    instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
     GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
     genProduceReg(treeNode);
 }
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 0e7edf4f15902..4b58a8b6899e8 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -323,6 +323,139 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             }
         }
     }
+
+    // This if check needs to be changed to make sure we only
+    // block casts which are already Fixed UP.
+    do
+    {
+        if (!tree->gtOverflow() && varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && !varTypeIsSmall(dstType))
+        {
+            if ((dstType == TYP_LONG) && (srcType == TYP_FLOAT))
+            {
+                oper    = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
+                srcType = TYP_DOUBLE;
+            }
+            if (tree->IsSaturatedConversion())
+            {
+                break;
+            }
+            CorInfoType fieldType = (srcType == TYP_DOUBLE) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT;
+
+            if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+            {
+                if (varTypeIsUnsigned(dstType))
+                {
+                    // Generate the control table for VFIXUPIMMSD
+                    // The behavior we want is to saturate negative values to 0.
+                    GenTreeVecCon* tbl = gtNewVconNode(TYP_SIMD16);
+
+                    // QNAN: 0b1000:
+                    // SNAN: 0b1000
+                    // ZERO: 0b0000:
+                    // +ONE: 0b0000
+                    // -INF: 0b0000
+                    // +INF: 0b0000
+                    // -VAL: 0b1000: Saturate to Zero
+                    // +VAL: 0b0000
+                    tbl->gtSimdVal.i32[0] = 0x08000088;
+
+                    // Generate first operand
+                    // The logic is that first and second operand are basically the same because we want
+                    // the output to be in the same xmm register
+                    // Hence we clone the first operand
+                    GenTree* op2Clone = fgMakeMultiUse(&oper);
+
+                    // run vfixupimmsd base on table and no flags reporting
+                    GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, oper, op2Clone, tbl, gtNewIconNode(0),
+                                                                NI_AVX512F_FixupScalar, fieldType, 16);
+
+                    // Convert to scalar
+                    // Here, we try to insert a Vector128 to Scalar node so that the input
+                    // can be provided to the scalar cast
+                    GenTree* retNode1 =
+                        gtNewSimdHWIntrinsicNode(srcType, retNode, NI_Vector128_ToScalar, fieldType, 16);
+                    tree = gtNewCastNode(genActualType(dstType), retNode1, false, dstType);
+                    tree->SetSaturatedConversion();
+                    return fgMorphTree(tree);
+                }
+                else
+                {
+                    CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG;
+
+                    ssize_t actualMaxVal = (dstType == TYP_INT) ? INT32_MAX : INT64_MAX;
+
+                    // CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG;
+                    // Generate the control table for VFIXUPIMMSD
+                    // The behavior we want is to saturate negative values to 0.
+                    GenTreeVecCon* tbl = gtNewVconNode(TYP_SIMD16);
+
+                    // QNAN: 0b1000:
+                    // SNAN: 0b1000
+                    // ZERO: 0b0000:
+                    // +ONE: 0b0000
+                    // -INF: 0b0000
+                    // +INF: 0b0000
+                    // -VAL: 0b0000: Saturate to Zero
+                    // +VAL: 0b0000
+                    tbl->gtSimdVal.i32[0] = 0x00000088;
+
+                    // Generate first operand
+                    // The logic is that first and second operand are basically the same because we want
+                    // the output to be in the same xmm register
+                    // Hence we clone the first operand
+                    GenTree* op2Clone = fgMakeMultiUse(&oper);
+
+                    // run vfixupimmsd base on table and no flags reporting
+                    oper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, oper, op2Clone, tbl, gtNewIconNode(0),
+                                                    NI_AVX512F_FixupScalar, fieldType, 16);
+
+                    GenTree* saturate_val = oper;
+
+                    // get the max value vector
+
+                    GenTree* max_val = (srcType == TYP_DOUBLE) ? gtNewDconNodeD(static_cast<double>(actualMaxVal))
+                                                               : gtNewDconNodeF(static_cast<float>(actualMaxVal));
+                    GenTree* max_valDup =
+                        (dstType == TYP_INT) ? gtNewIconNode(actualMaxVal, dstType) : gtNewLconNode(actualMaxVal);
+                    max_val    = gtNewSimdCreateBroadcastNode(TYP_SIMD16, max_val, fieldType, 16);
+                    max_valDup = gtNewSimdCreateBroadcastNode(TYP_SIMD16, max_valDup, destFieldType, 16);
+
+                    // we will be using the input value twice
+                    GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
+
+                    // usage 1 --> compare with max value of integer
+                    saturate_val = gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, saturate_val, max_val, fieldType, 16);
+                    GenTree* retNode1 =
+                        gtNewSimdHWIntrinsicNode(srcType, saturate_valDup, NI_Vector128_ToScalar, fieldType, 16);
+                    // cast it
+                    tree = gtNewCastNode(dstType, retNode1, false, dstType);
+                    tree->SetSaturatedConversion();
+                    GenTree* tree1 = gtNewSimdCreateBroadcastNode(TYP_SIMD16, tree, destFieldType, 16);
+
+                    // usage 2 --> use thecompared mask with input value and max value to blend
+                    // GenTree* dummy = gtNewSimdCreateBroadcastNode(TYP_SIMD16, gtNewLconNode(2), destFieldType, 16);
+                    saturate_val = gtNewSimdCndSelNode(TYP_SIMD16, saturate_val, max_valDup, tree1, destFieldType, 16);
+                    saturate_val =
+                        gtNewSimdHWIntrinsicNode(dstType, saturate_val, NI_Vector128_ToScalar, destFieldType, 16);
+                    return fgMorphTree(saturate_val);
+                }
+            }
+            // does not work, need to convert into helper function
+            else if (srcType == TYP_FLOAT && dstType == TYP_UINT)
+            {
+                return fgMorphCastIntoHelper(tree, CORINFO_HELP_FLT2UINT, oper);
+            }
+            else if (srcType == TYP_DOUBLE && dstType == TYP_UINT)
+            {
+                return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
+            }
+            else if (srcType == TYP_DOUBLE && dstType == TYP_INT)
+            {
+                return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
+            }
+        }
+    } while (false);
+
 #endif // TARGET_AMD64
 
     // See if the cast has to be done in two steps.  R -> I
@@ -336,7 +469,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #elif defined(TARGET_AMD64)
             // Amd64: src = float, dst = uint64 or overflow conversion.
             // This goes through helper and hence src needs to be converted to double.
-            && (tree->gtOverflow() || (dstType == TYP_ULONG))
+            && (tree->gtOverflow() || ((dstType == TYP_INT || dstType == TYP_ULONG || dstType == TYP_LONG) &&
+                                       !compOpportunisticallyDependsOn(InstructionSet_AVX512F)))
 #elif defined(TARGET_ARM)
             // Arm: src = float, dst = int64/uint64 or overflow conversion.
             && (tree->gtOverflow() || varTypeIsLong(dstType))
@@ -371,26 +505,43 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                 switch (dstType)
                 {
                     case TYP_INT:
+#ifdef TARGET_XARCH
+                        if (!tree->IsSaturatedConversion())
+                        {
+                            return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
+                        }
+#endif // TARGET_XARCH
                         return nullptr;
 
                     case TYP_UINT:
-#if defined(TARGET_ARM) || defined(TARGET_AMD64)
+#if defined(TARGET_ARM)
                         return nullptr;
 #else  // TARGET_X86
-                        oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG);
+                        if (tree->IsSaturatedConversion())
+                        {
+                            return nullptr;
+                        }
+                        /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG);
                         tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT);
-                        return fgMorphTree(tree);
+                        return fgMorphTree(tree);*/
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
 #endif // TARGET_X86
 
                     case TYP_LONG:
-#ifdef TARGET_AMD64
-                        // SSE2 has instructions to convert a float/double directly to a long
+#ifdef TARGET_XARCH
+                        if (!tree->IsSaturatedConversion())
+                        {
+                            return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
+                        }
                         return nullptr;
-#else  // !TARGET_AMD64
+#endif // TARGET_XARCH
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
-#endif // !TARGET_AMD64
 
                     case TYP_ULONG:
+#ifdef TARGET_AMD64
+                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+                            return nullptr;
+#endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
                     default:
                         unreached();
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index f06b38736ddad..d1e157ab8dbef 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -513,23 +513,45 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
     switch (intrinsic)
     {
 #if defined(TARGET_XARCH)
+
         case NI_VectorT_ConvertToDouble:
+        {
+#ifdef TARGET_AMD64
+            if ((varTypeIsLong(simdBaseType) && IsBaselineVector512IsaSupportedOpportunistically()) ||
+                (simdBaseType == TYP_FLOAT && ((simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX)) ||
+                                               (simdSize == 64 && IsBaselineVector512IsaSupportedOpportunistically()))))
+            {
+                break;
+            }
+#endif // TARGET_AMD64
+            return nullptr;
+        }
+
+        case NI_VectorT_ConvertToInt32:
         case NI_VectorT_ConvertToInt64:
         case NI_VectorT_ConvertToUInt32:
         case NI_VectorT_ConvertToUInt64:
         {
-            // TODO-XARCH-CQ: These intrinsics should be accelerated
+#ifdef TARGET_AMD64
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                break;
+            }
+#endif // TARGET_AMD64
             return nullptr;
         }
 
         case NI_VectorT_ConvertToSingle:
         {
-            if (simdBaseType == TYP_UINT)
+#ifdef TARGET_AMD64
+            if ((simdBaseType == TYP_INT && ((simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX)) ||
+                                             (simdSize == 64 && IsBaselineVector512IsaSupportedOpportunistically()))) ||
+                (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()))
             {
-                // TODO-XARCH-CQ: These intrinsics should be accelerated
-                return nullptr;
+                break;
             }
-            break;
+#endif // TARGET_AMD64
+            return nullptr;
         }
 #endif // TARGET_XARCH
 
@@ -1154,50 +1176,269 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                 }
 
 #if defined(TARGET_XARCH)
+
+                case NI_VectorT_ConvertToInt64:
+                {
+                    assert(sig->numArgs == 1);
+                    assert(simdBaseType == TYP_DOUBLE);
+#ifdef TARGET_AMD64
+                    if (IsBaselineVector512IsaSupportedOpportunistically())
+                    {
+                        var_types simdType = getSIMDTypeForSize(simdSize);
+                        // Generate the control table for VFIXUPIMMSD
+                        // The behavior we want is to saturate negative values to 0.
+                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                        // QNAN: 0b1000: Saturate to Zero
+                        // SNAN: 0b1000: Saturate to Zero
+                        // ZERO: 0b0000
+                        // +ONE: 0b0000
+                        // -INF: 0b0000
+                        // +INF: 0b0000
+                        // -VAL: 0b0000
+                        // +VAL: 0b0000
+                        for (int i = 0; i < 8; i++)
+                        {
+                            tbl->gtSimdVal.i64[i] = 0x00000088;
+                        }
+
+                        // Generate first operand
+                        // The logic is that first and second operand are basically the same because we want
+                        // the output to be in the same xmm register
+                        // Hence we clone the first operand
+                        GenTree* op2Clone = fgMakeMultiUse(&op1);
+                        // GenTree* op2Clone;
+                        // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL,
+                        //                     nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion"));
+
+                        // run vfixupimmsd base on table and no flags reporting
+                        GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                                         NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                        GenTree* max_val =
+                            gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast<double>(INT64_MAX)),
+                                                         simdBaseJitType, simdSize);
+                        GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG),
+                                                                           CORINFO_TYPE_LONG, simdSize);
+                        // we will be using the input value twice
+                        GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
+
+                        // usage 1 --> compare with max value of integer
+                        saturate_val =
+                            gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
+                        // cast it
+
+                        NamedIntrinsic intrinsic =
+                            (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
+
+                        GenTree* retNode =
+                            gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
+
+                        // usage 2 --> use thecompared mask with input value and max value to blend
+                        return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG,
+                                                   simdSize);
+                    }
+#endif // TARGET_AMD64
+                    return nullptr;
+                }
+
+                case NI_VectorT_ConvertToUInt32:
+                {
+                    assert(sig->numArgs == 1);
+                    assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT));
+#ifdef TARGET_AMD64
+                    if (IsBaselineVector512IsaSupportedOpportunistically())
+                    {
+                        var_types simdType = getSIMDTypeForSize(simdSize);
+                        // Generate the control table for VFIXUPIMMSD
+                        // The behavior we want is to saturate negative values to 0.
+                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                        // QNAN: 0b0000:
+                        // SNAN: 0b0000
+                        // ZERO: 0b0000:
+                        // +ONE: 0b0000
+                        // -INF: 0b0000
+                        // +INF: 0b0000
+                        // -VAL: 0b1000: Saturate to Zero
+                        // +VAL: 0b0000
+                        for (int i = 0; i < 16; i++)
+                        {
+                            tbl->gtSimdVal.i32[i] = 0x08000088;
+                        }
+
+                        // Generate first operand
+                        // The logic is that first and second operand are basically the same because we want
+                        // the output to be in the same xmm register
+                        // Hence we clone the first operand
+                        GenTree* op2Clone = fgMakeMultiUse(&op1);
+
+                        // run vfixupimmsd base on table and no flags reporting
+                        GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                                     NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                        intrinsic = (simdSize == 16)
+                                        ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
+                                        : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
+                                                           : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+
+                        return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+                    }
+#endif // TARGET_AMD64
+                    return nullptr;
+                }
+
+                case NI_VectorT_ConvertToUInt64:
+                {
+                    assert(sig->numArgs == 1);
+                    assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT));
+#ifdef TARGET_AMD64
+                    if (IsBaselineVector512IsaSupportedOpportunistically())
+                    {
+                        var_types simdType = getSIMDTypeForSize(simdSize);
+                        // Generate the control table for VFIXUPIMMSD
+                        // The behavior we want is to saturate negative values to 0.
+                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                        // QNAN: 0b0000:
+                        // SNAN: 0b0000
+                        // ZERO: 0b0000:
+                        // +ONE: 0b0000
+                        // -INF: 0b0000
+                        // +INF: 0b0000
+                        // -VAL: 0b1000: Saturate to Zero
+                        // +VAL: 0b0000
+                        for (int i = 0; i < 8; i++)
+                        {
+                            tbl->gtSimdVal.i64[i] = 0x08000088;
+                        }
+
+                        // Generate first operand
+                        // The logic is that first and second operand are basically the same because we want
+                        // the output to be in the same xmm register
+                        // Hence we clone the first operand
+                        GenTree* op2Clone = fgMakeMultiUse(&op1);
+
+                        // run vfixupimmsd base on table and no flags reporting
+                        GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                                     NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                        intrinsic = (simdSize == 16)
+                                        ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
+                                        : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
+                                                           : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
+
+                        return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+                    }
+#endif // TARGET_AMD64
+                    return nullptr;
+                }
+
                 case NI_VectorT_ConvertToInt32:
                 {
                     assert(simdBaseType == TYP_FLOAT);
-                    NamedIntrinsic convert;
-
-                    switch (simdSize)
+#ifdef TARGET_AMD64
+                    if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        case 16:
-                            convert = NI_SSE2_ConvertToVector128Int32WithTruncation;
-                            break;
-                        case 32:
-                            convert = NI_AVX_ConvertToVector256Int32WithTruncation;
-                            break;
-                        case 64:
-                            convert = NI_AVX512F_ConvertToVector512Int32WithTruncation;
-                            break;
-                        default:
-                            unreached();
+                        var_types simdType = getSIMDTypeForSize(simdSize);
+                        // Generate the control table for VFIXUPIMMSD
+                        // The behavior we want is to saturate negative values to 0.
+                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
+
+                        // QNAN: 0b1000: Saturate to Zero
+                        // SNAN: 0b1000: Saturate to Zero
+                        // ZERO: 0b0000
+                        // +ONE: 0b0000
+                        // -INF: 0b0000
+                        // +INF: 0b0000
+                        // -VAL: 0b0000
+                        // +VAL: 0b0000
+                        for (int i = 0; i < 16; i++)
+                        {
+                            tbl->gtSimdVal.i32[i] = 0x00000088;
+                        }
+
+                        // Generate first operand
+                        // The logic is that first and second operand are basically the same because we want
+                        // the output to be in the same xmm register
+                        // Hence we clone the first operand
+                        GenTree* op2Clone = fgMakeMultiUse(&op1);
+                        // GenTree* op2Clone;
+                        // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL,
+                        //                     nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion"));
+
+                        // run vfixupimmsd base on table and no flags reporting
+                        GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
+                                                                         NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+
+                        GenTree* max_val =
+                            gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast<float>(INT32_MAX)),
+                                                         simdBaseJitType, simdSize);
+                        GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT),
+                                                                           CORINFO_TYPE_INT, simdSize);
+                        // we will be using the input value twice
+                        GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
+
+                        // usage 1 --> compare with max value of integer
+                        saturate_val =
+                            gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
+                        // cast it
+
+                        NamedIntrinsic intrinsic =
+                            (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
+                                             : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512Int32WithTruncation;
+
+                        GenTree* retNode =
+                            gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
+
+                        // usage 2 --> use thecompared mask with input value and max value to blend
+                        return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT,
+                                                   simdSize);
                     }
+#endif // TARGET_AMD64
+                    return nullptr;
+                }
 
-                    return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize);
+                case NI_VectorT_ConvertToDouble:
+                {
+                    assert(sig->numArgs == 1);
+                    assert(varTypeIsLong(simdBaseType) || simdBaseType == TYP_FLOAT);
+                    if (varTypeIsLong(simdBaseType))
+                    {
+                        intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Double
+                                                     : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Double
+                                                                        : NI_AVX512DQ_ConvertToVector512Double;
+                    }
+                    else
+                    {
+                        intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Double
+                                                     : (simdSize == 32) ? NI_AVX_ConvertToVector256Double
+                                                                        : NI_AVX512F_ConvertToVector512Double;
+                    }
+                    return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
                 }
 
                 case NI_VectorT_ConvertToSingle:
                 {
-                    assert(simdBaseType == TYP_INT);
-                    NamedIntrinsic convert;
+                    assert(varTypeIsInt(simdBaseType));
+                    NamedIntrinsic intrinsic = NI_Illegal;
 
-                    switch (simdSize)
+                    if (simdBaseType == TYP_INT)
                     {
-                        case 16:
-                            convert = NI_SSE2_ConvertToVector128Single;
-                            break;
-                        case 32:
-                            convert = NI_AVX_ConvertToVector256Single;
-                            break;
-                        case 64:
-                            convert = NI_AVX512F_ConvertToVector512Single;
-                            break;
-                        default:
-                            unreached();
+                        intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Single
+                                                     : (simdSize == 32) ? NI_AVX_ConvertToVector256Single
+                                                                        : NI_AVX512F_ConvertToVector512Single;
                     }
-
-                    return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize);
+                    else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())
+                    {
+                        intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single
+                                                     : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single
+                                                                        : NI_AVX512F_ConvertToVector512Single;
+                    }
+                    return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
                 }
 #elif defined(TARGET_ARM64)
                 case NI_VectorT_ConvertToDouble:
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 5b9e74e91b242..945a78333e8ef 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1537,6 +1537,7 @@ void HelperCallProperties::init()
             case CORINFO_HELP_LNG2DBL:
             case CORINFO_HELP_ULNG2DBL:
             case CORINFO_HELP_DBL2LNG:
+            case CORINFO_HELP_FLT2UINT:
             case CORINFO_HELP_DBL2ULNG:
             case CORINFO_HELP_FLTREM:
             case CORINFO_HELP_DBLREM:
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 4a8ca85aa3e58..fa2dbfc80168d 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -12776,6 +12776,11 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
             hasOverflowCheck = true;
             break;
 
+        case CORINFO_HELP_FLT2UINT:
+            castToType   = TYP_UINT;
+            castFromType = TYP_FLOAT;
+            break;
+
         case CORINFO_HELP_DBL2UINT_OVF:
             castToType       = TYP_UINT;
             castFromType     = TYP_DOUBLE;
@@ -13084,6 +13089,7 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
         case CORINFO_HELP_DBL2INT_OVF:
         case CORINFO_HELP_DBL2LNG:
         case CORINFO_HELP_DBL2LNG_OVF:
+        case CORINFO_HELP_FLT2UINT:
         case CORINFO_HELP_DBL2UINT_OVF:
         case CORINFO_HELP_DBL2ULNG:
         case CORINFO_HELP_DBL2ULNG_OVF:
diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index 6491813e3ed4e..cd25f6731b2b0 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -5,6 +5,53 @@
 #include "CommonMacros.h"
 #include "rhassert.h"
 
+//
+// Floating point and 64-bit integer math helpers.
+//
+
+
+EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const double int64_min = (double)INT64_MIN;
+    const double int64_max = (double)INT64_MAX;
+    return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val;
+#else
+    return (int64_t)val;
+#endif //HOST_X86 || HOST_AMD64
+}
+
+EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpDbl2Int(double val)
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const double int32_min = (double)INT32_MIN - 1.0;
+    const double int32_max = -2.0 * (double)INT32_MIN;
+    return (val!= val) ? 0 : (val <= int32_min) ? INT32_MIN : (val >= int32_max) ? INT32_MAX : (int32_t)val;
+#else
+    return (int32_t)val;
+#endif //HOST_X86 || HOST_AMD64
+}
+
+EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val)
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const double uint32_max_plus_1 = -2.0 * (double)INT32_MIN;
+    return (val < 0) ? 0 : (val != val || val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val;
+#else
+    return (uint32_t)val;
+#endif //HOST_X86 || HOST_AMD64
+}
+
+EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpFlt2UInt(float val)
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const float uint32_max_plus_1 = -2.0 * (float)INT32_MIN;
+    return (val != val || val < 0) ? 0 : (val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val;
+#else
+    return (uint32_t)val;
+#endif //HOST_X86 || HOST_AMD64
+}
+
 #undef min
 #undef max
 #include <cmath>
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
index a37945534865b..4e8a5bba4d5a8 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
@@ -311,6 +311,7 @@ public enum ReadyToRunHelper
         Dbl2UIntOvf                 = 0xD5,
         Dbl2ULng                    = 0xD6,
         Dbl2ULngOvf                 = 0xD7,
+        Flt2UInt                    = 0xD8,
 
         // Floating point ops
         DblRem                      = 0xE0,
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index 5346806c1aff6..547ac19096cca 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -36,6 +36,7 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_DBL2LNG_OVF,
         CORINFO_HELP_DBL2UINT,
         CORINFO_HELP_DBL2UINT_OVF,
+        CORINFO_HELP_FLT2UINT,
         CORINFO_HELP_DBL2ULNG,
         CORINFO_HELP_DBL2ULNG_OVF,
         CORINFO_HELP_FLTREM,
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index 8d5b1357d9e3e..ce321a467b309 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -177,6 +177,9 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                 case ReadyToRunHelper.Dbl2ULng:
                     methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULong");
                     break;
+                case ReadyToRunHelper.Flt2UInt:
+                    mangledName = "RhpFlt2UInt";
+                    break;
 
                 case ReadyToRunHelper.Dbl2IntOvf:
                     methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToIntOverflow");
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
index ad83b1eb42a5d..0524c68181cae 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
@@ -1150,6 +1150,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT:
                     id = ReadyToRunHelper.Dbl2UInt;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT:
+                    id = ReadyToRunHelper.Flt2UInt;
+                    break;
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF:
                     id = ReadyToRunHelper.Dbl2UIntOvf;
                     break;
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
index 0eae2f10cb8f0..022d17c662257 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
@@ -1909,6 +1909,10 @@ private void ParseHelper(StringBuilder builder)
                     builder.Append("DBL2UINT");
                     break;
 
+                case ReadyToRunHelper.Flt2UInt:
+                    builder.Append("FLT2UINT");
+                    break;
+
                 case ReadyToRunHelper.Dbl2UIntOvf:
                     builder.Append("DBL2UINTOVF");
                     break;
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
index 1d2f977c4c167..4484b90c0c035 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
@@ -678,6 +678,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT:
                     id = ReadyToRunHelper.Dbl2UInt;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT:
+                    id = ReadyToRunHelper.Flt2UInt;
+                    break;
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF:
                     id = ReadyToRunHelper.Dbl2UIntOvf;
                     break;

From 12cc04f589411853e157e079b3e5fceb41bbb8df Mon Sep 17 00:00:00 2001
From: Deepak Rajendrakumaran <deepak.rajendrakumaran@intel.com>
Date: Thu, 29 Feb 2024 16:11:24 -0800
Subject: [PATCH 05/14] Fixing behaviour in tests.

---
 .../out_of_range_fp_to_int_conversions.cpp    | 18 ++++-----------
 .../out_of_range_fp_to_int_conversions.cs     | 22 ++++---------------
 .../CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il |  3 +++
 .../CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il |  3 +++
 .../JitBlue/Runtime_62692/Runtime_62692.cs    |  4 ++--
 5 files changed, 16 insertions(+), 34 deletions(-)

diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index eaf7f2fa1a9da..de7f2c19577c1 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -30,12 +30,12 @@ extern "C" DLLEXPORT int32_t ConvertDoubleToInt32(double x, FPtoIntegerConversio
 
     switch (t) {
     case CONVERT_BACKWARD_COMPATIBLE:
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
     case CONVERT_SENTINEL:
         return ((x != x) || (x < INT32_MIN) || (x > INT32_MAX)) ? INT32_MIN : (int32_t)x;
 
     case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
     case CONVERT_SATURATING:
+    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
         return (x != x) ? 0 : (x < INT32_MIN) ? INT32_MIN : (x > INT32_MAX) ? INT32_MAX : (int32_t)x;
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
@@ -53,7 +53,6 @@ extern "C" DLLEXPORT uint32_t ConvertDoubleToUInt32(double x, FPtoIntegerConvers
     const double int64_max_plus_1 = 0x1.p63; // 0x43e0000000000000 // (uint64_t)INT64_MAX + 1;
 
     switch (t) {
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
     case CONVERT_BACKWARD_COMPATIBLE:
         return ((x != x) || (x < INT64_MIN) || (x >= int64_max_plus_1)) ? 0 : (uint32_t)(int64_t)x;
 
@@ -62,6 +61,7 @@ extern "C" DLLEXPORT uint32_t ConvertDoubleToUInt32(double x, FPtoIntegerConvers
 
     case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
     case CONVERT_SATURATING:
+    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
         return ((x != x) || (x < 0)) ? 0 : (x > UINT32_MAX) ? UINT32_MAX : (uint32_t)x;
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
@@ -95,7 +95,6 @@ extern "C" DLLEXPORT int64_t ConvertDoubleToInt64(double x, FPtoIntegerConversio
     const double int32_max_plus1 = ((double)INT32_MAX) + 1;
 
     switch (t) {
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
     case CONVERT_BACKWARD_COMPATIBLE:
     case CONVERT_SENTINEL:
         return ((x != x) || (x < INT64_MIN) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x;
@@ -111,6 +110,7 @@ extern "C" DLLEXPORT int64_t ConvertDoubleToInt64(double x, FPtoIntegerConversio
         }
 
     case CONVERT_SATURATING:
+    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
         return (x != x) ? 0 : (x < INT64_MIN) ? INT64_MIN : (x >= int64_max_plus_1) ? INT64_MAX : (int64_t)x;
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
@@ -140,6 +140,7 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
         return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x;
 
     case CONVERT_SATURATING:
+    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
         return ((x != x) || (x < 0)) ? 0 : (x >= uint64_max_plus_1) ? UINT64_MAX : (uint64_t)x;
 
     case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
@@ -154,17 +155,6 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
             }
         }
 
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-        if (x < int64_max_plus_1)
-        {
-            return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x;
-        }
-        else
-        {
-            x -= int64_max_plus_1;
-            x = trunc(x);
-            return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000);
-        }
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
     }
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index 5b78783c09e4c..1f75c4dbaef32 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -87,11 +87,11 @@ public static int ConvertDoubleToInt32(double x, FPtoIntegerConversionType t)
 
             switch (t)
             {
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE:
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
                     return (Double.IsNaN(x) || (x<int.MinValue) || (x > int.MaxValue)) ? int.MinValue: (int) x;
 
+                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
                 case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
                     return Double.IsNaN(x) ? 0 : (x< int.MinValue) ? int.MinValue : (x > int.MaxValue) ? int.MaxValue : (int) x;
@@ -109,13 +109,13 @@ public static uint ConvertDoubleToUInt32(double x, FPtoIntegerConversionType t)
 
             switch (t)
             {
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE:
                     return (Double.IsNaN(x) || (x < long.MinValue) || (x >= llong_max_plus_1)) ? 0 : (uint)(long)x;
 
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
                     return (Double.IsNaN(x) || (x < 0) || (x > uint.MaxValue)) ? uint.MaxValue : (uint)x;
 
+                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
                 case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
                     return (Double.IsNaN(x) || (x < 0)) ? 0 : (x > uint.MaxValue) ? uint.MaxValue : (uint)x;
@@ -136,7 +136,6 @@ public static long ConvertDoubleToInt64(double x, FPtoIntegerConversionType t)
 
             switch (t)
             {
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE:
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
                     return (Double.IsNaN(x) || (x < long.MinValue) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x;
@@ -151,6 +150,7 @@ public static long ConvertDoubleToInt64(double x, FPtoIntegerConversionType t)
                         return -(long)CppNativeArm32ConvertDoubleToUInt64(-x);
                     }
 
+                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
                     return Double.IsNaN(x) ? 0 : (x < long.MinValue) ? long.MinValue : (x >= llong_max_plus_1) ? long.MaxValue : (long)x;
             }
@@ -185,6 +185,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
                     return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x;
 
+                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
                     return (Double.IsNaN(x) || (x < 0)) ? 0 : (x >= ullong_max_plus_1) ? ulong.MaxValue : (ulong)x;
 
@@ -199,21 +200,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                             return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
                         }
                     }
-
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-
-                    if (x < two63)
-                    {
-                        return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x;
-                    }
-                    else
-                    {
-                        // (double)LLONG_MAX cannot be represented exactly as double
-                        const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1);
-                        x -= two63;
-                        x = Math.Truncate(x);
-                        return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000);
-                    }
             }
 
             return 0;
diff --git a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il
index b8ccece0a1d6f..ff132dd868596 100644
--- a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il
+++ b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il
@@ -48,6 +48,9 @@ End_Orphan_3:
 } catch [mscorlib]System.OverflowException {
   pop
   leave the_end
+} catch [mscorlib]System.DivideByZeroException {
+  pop
+  leave the_end
 }
 the_end:
 ldc.i4 100
diff --git a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il
index 65f3bc2af34f6..0422a59b02052 100644
--- a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il
+++ b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il
@@ -684,6 +684,9 @@ leave END
 } catch [mscorlib]System.OverflowException {
 pop
 leave END
+} catch [mscorlib]System.DivideByZeroException {
+  pop
+  leave END
 }
 END:
 ldc.i4 100
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs b/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs
index 5b85cbb0115a0..22fb1e0bde69c 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs
@@ -39,8 +39,8 @@ public static int TestEntryPoint()
             AssertEqual(Problem2(1111, 0xFFFF_FFFF_0000_0001), 3414328792);
             AssertEqual(Problem3(1, 0xFFFF_0001), 0);
             AssertEqual(Problem4(1111, 0xFFFF_FFFF_0000_0001), 3414328792);
-            AssertEqual(Problem5(1111, double.MaxValue), 3307008522);
-            AssertEqual(Problem6(1111, float.MaxValue), 3307008522);
+            AssertEqual(Problem5(1111, double.MaxValue), 1921271346);
+            AssertEqual(Problem6(1111, float.MaxValue), 1921271346);
             AssertEqual(Problem5(1111, double.MinValue), 3307008522);
             AssertEqual(Problem6(1111, float.MinValue), 3307008522);
             AssertEqual(Problem5(1111, -0.0), 3307008522);

From 5c1ebdbbb5f0eb4f3d21398c16cf633ac6297412 Mon Sep 17 00:00:00 2001
From: Deepak Rajendrakumaran <deepak.rajendrakumaran@intel.com>
Date: Fri, 1 Mar 2024 15:33:05 -0800
Subject: [PATCH 06/14] Merging with main Fixing edge cases

---
 src/coreclr/jit/morph.cpp                     |  3 +
 src/coreclr/jit/utils.cpp                     |  2 +
 src/coreclr/jit/valuenum.cpp                  | 18 +++++
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 65 +++++--------------
 .../ILCompiler.Compiler/Compiler/JitHelper.cs | 16 ++---
 src/coreclr/vm/corelib.h                      |  1 +
 src/coreclr/vm/ecall.cpp                      |  4 ++
 src/coreclr/vm/jithelpers.cpp                 |  6 +-
 .../System.Private.CoreLib/src/System/Math.cs | 16 ++++-
 9 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 4b58a8b6899e8..cc195cddd7223 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -447,6 +447,9 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             }
             else if (srcType == TYP_DOUBLE && dstType == TYP_UINT)
             {
+                /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG);
+                tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT);
+                return fgMorphTree(tree);*/
                 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
             }
             else if (srcType == TYP_DOUBLE && dstType == TYP_INT)
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 945a78333e8ef..9416cf0c061af 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1536,6 +1536,8 @@ void HelperCallProperties::init()
             case CORINFO_HELP_LMUL:
             case CORINFO_HELP_LNG2DBL:
             case CORINFO_HELP_ULNG2DBL:
+            case CORINFO_HELP_DBL2INT:
+            case CORINFO_HELP_DBL2UINT:
             case CORINFO_HELP_DBL2LNG:
             case CORINFO_HELP_FLT2UINT:
             case CORINFO_HELP_DBL2ULNG:
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index fa2dbfc80168d..2858ab7575c88 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -12740,6 +12740,7 @@ void Compiler::fgValueNumberCall(GenTreeCall* call)
 
 void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
 {
+    // printf("\n Deepak fgValueNumberCastHelper IN \n");
     CorInfoHelpFunc helpFunc         = eeGetHelperNum(call->gtCallMethHnd);
     var_types       castToType       = TYP_UNDEF;
     var_types       castFromType     = TYP_UNDEF;
@@ -12770,6 +12771,16 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
             castFromType = TYP_DOUBLE;
             break;
 
+        case CORINFO_HELP_DBL2INT:
+            castToType   = TYP_INT;
+            castFromType = TYP_DOUBLE;
+            break;
+
+        case CORINFO_HELP_DBL2UINT:
+            castToType   = TYP_UINT;
+            castFromType = TYP_DOUBLE;
+            break;
+
         case CORINFO_HELP_DBL2LNG_OVF:
             castToType       = TYP_LONG;
             castFromType     = TYP_DOUBLE;
@@ -13081,6 +13092,8 @@ VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc)
 bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
 {
     CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+    // printf("\n Deepak Call = [%06u], helpFunc = %d \n", dspTreeID(call), (int)helpFunc);
+    // printTreeID(call);
 
     switch (helpFunc)
     {
@@ -13088,6 +13101,8 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
         case CORINFO_HELP_ULNG2DBL:
         case CORINFO_HELP_DBL2INT_OVF:
         case CORINFO_HELP_DBL2LNG:
+        case CORINFO_HELP_DBL2INT:
+        case CORINFO_HELP_DBL2UINT:
         case CORINFO_HELP_DBL2LNG_OVF:
         case CORINFO_HELP_FLT2UINT:
         case CORINFO_HELP_DBL2UINT_OVF:
@@ -13117,7 +13132,10 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
         break;
 
         default:
+        {
+            // printf("\n Deepak DEFAULT!! \n");
             break;
+        }
     }
 
     bool pure        = s_helperCallProperties.IsPure(helpFunc);
diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index cd25f6731b2b0..120c18e48f17c 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -5,53 +5,6 @@
 #include "CommonMacros.h"
 #include "rhassert.h"
 
-//
-// Floating point and 64-bit integer math helpers.
-//
-
-
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
-{
-#if defined(HOST_X86) || defined(HOST_AMD64)
-    const double int64_min = (double)INT64_MIN;
-    const double int64_max = (double)INT64_MAX;
-    return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val;
-#else
-    return (int64_t)val;
-#endif //HOST_X86 || HOST_AMD64
-}
-
-EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpDbl2Int(double val)
-{
-#if defined(HOST_X86) || defined(HOST_AMD64)
-    const double int32_min = (double)INT32_MIN - 1.0;
-    const double int32_max = -2.0 * (double)INT32_MIN;
-    return (val!= val) ? 0 : (val <= int32_min) ? INT32_MIN : (val >= int32_max) ? INT32_MAX : (int32_t)val;
-#else
-    return (int32_t)val;
-#endif //HOST_X86 || HOST_AMD64
-}
-
-EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val)
-{
-#if defined(HOST_X86) || defined(HOST_AMD64)
-    const double uint32_max_plus_1 = -2.0 * (double)INT32_MIN;
-    return (val < 0) ? 0 : (val != val || val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val;
-#else
-    return (uint32_t)val;
-#endif //HOST_X86 || HOST_AMD64
-}
-
-EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpFlt2UInt(float val)
-{
-#if defined(HOST_X86) || defined(HOST_AMD64)
-    const float uint32_max_plus_1 = -2.0 * (float)INT32_MIN;
-    return (val != val || val < 0) ? 0 : (val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val;
-#else
-    return (uint32_t)val;
-#endif //HOST_X86 || HOST_AMD64
-}
-
 #undef min
 #undef max
 #include <cmath>
@@ -129,14 +82,26 @@ EXTERN_C int64_t REDHAWK_CALLCONV RhpLLsh(int64_t i, int32_t j)
     return i << (j & 0x3f);
 }
 
+EXTERN_C double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val)
+{
+    return (double)val;
+}
+
 EXTERN_C int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
 {
     return (int64_t)val;
 }
 
-EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val)
+#else // HOST_X86 || HOST_AMD64
+
+EXTERN_C int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
 {
-    return (double)val;
+    const double int64_min = (double)INT64_MIN;
+    const double int64_max = (double)INT64_MAX;
+    return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val;
 }
+#endif
+
+
+
 
-#endif // HOST_ARM
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index ce321a467b309..31434ca0f979b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -177,8 +177,15 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                 case ReadyToRunHelper.Dbl2ULng:
                     methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULong");
                     break;
+                case ReadyToRunHelper.Dbl2UInt:
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToUInt");
+                    break;
+                case ReadyToRunHelper.Dbl2Int:
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt");
+                    break;
                 case ReadyToRunHelper.Flt2UInt:
-                    mangledName = "RhpFlt2UInt";
+                    methodDesc = context.GetHelperEntryPoint("System", "Math", "FloatToUInt");
+                    //mangledName = "RhpFlt2UInt";
                     break;
 
                 case ReadyToRunHelper.Dbl2IntOvf:
@@ -200,13 +207,6 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                 case ReadyToRunHelper.FltRem:
                     methodDesc = context.GetHelperEntryPoint("System", "MathF", "FloatReminder");
                     break;
-
-                case ReadyToRunHelper.Dbl2Int:
-                    methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt");
-                    break;
-                case ReadyToRunHelper.Dbl2UInt:
-                    methodDesc = context.GetHelperEntryPoint("System", "MathF", "DoubleToUInt");
-                    break;
                 case ReadyToRunHelper.DblRound:
                     DefType doubleType = context.GetWellKnownType(WellKnownType.Double);
                     methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("Round",
diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h
index eef4d60b64b87..a77b7fbd56da1 100644
--- a/src/coreclr/vm/corelib.h
+++ b/src/coreclr/vm/corelib.h
@@ -274,6 +274,7 @@ DEFINE_METHOD(MATH,                 DOUBLE_TO_ULONG_OVERFLOW,DoubleToULongOverfl
 DEFINE_METHOD(MATH,                 DOUBLE_REMINDER,        DoubleReminder,             NoSig)
 DEFINE_METHOD(MATH,                 DOUBLE_TO_INT,          DoubleToInt,                NoSig)
 DEFINE_METHOD(MATH,                 DOUBLE_TO_UINT,         DoubleToUInt,               NoSig)
+DEFINE_METHOD(MATH,                 FLOAT_TO_UINT,          FloatToUInt,                NoSig)
 DEFINE_METHOD(MATH,                 ROUND,                  Round,                      SM_Dbl_RetDbl)
 
 DEFINE_CLASS(MATHF,                 System,                 MathF)
diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp
index fa8cf590c28d6..6c0119a99b3de 100644
--- a/src/coreclr/vm/ecall.cpp
+++ b/src/coreclr/vm/ecall.cpp
@@ -205,6 +205,10 @@ void ECall::PopulateManagedHelpers()
     pDest = pMD->GetMultiCallableAddrOfCode();
     SetJitHelperFunction(CORINFO_HELP_DBL2UINT, pDest);
 
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__FLOAT_TO_UINT));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_FLT2UINT, pDest);
+
     pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND));
     pDest = pMD->GetMultiCallableAddrOfCode();
     SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest);
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 092dc342547bb..70c1b30c4f3f3 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -388,8 +388,10 @@ HCIMPLEND
 HCIMPL1_V(INT64, JIT_Dbl2Lng, double val)
 {
     FCALL_CONTRACT;
-
-    return (INT64)val;
+    const double int64_min = (double)INT64_MIN;
+    const double int64_max = (double)INT64_MAX;
+    return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (INT64)val;
+    //return (INT64)val;
 }
 HCIMPLEND
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index 34211170f9c3d..c154343331a03 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1735,7 +1735,19 @@ private static double DoubleReminder(double dividend, double divisor)
             return FMod(dividend, divisor);
         }
 
-        private static int DoubleToInt(double val) => (int)(long)val;
-        private static uint DoubleToUInt(double val) => (uint)(long)val;
+        private static int DoubleToInt(double val)
+        {
+            return double.IsNaN(val) || ((long)val < int.MinValue) ? int.MinValue : ((long)val > int.MaxValue) ? int.MaxValue : (int)(long)val;
+        }
+        private static uint DoubleToUInt(double val)
+        {
+            return double.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val;
+        }
+
+        private static uint FloatToUInt(float val)
+        {
+            //return 0;
+            return float.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val;
+        }
     }
 }

From 3e006be4308e2d9057f8f1524579b8c1d59d6b1d Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Tue, 5 Mar 2024 12:01:23 -0800
Subject: [PATCH 07/14] cleanup morph and FloatToUInt. Also, handle negative
 infinity value when converting to uint/ulong

---
 docs/design/coreclr/botr/readytorun-format.md |  1 -
 src/coreclr/inc/corinfo.h                     |  1 -
 src/coreclr/inc/jithelpers.h                  |  1 -
 src/coreclr/inc/readytorun.h                  |  1 -
 src/coreclr/inc/readytorunhelpers.h           |  1 -
 src/coreclr/jit/hwintrinsicxarch.cpp          |  8 ++---
 src/coreclr/jit/morph.cpp                     | 32 ++++---------------
 src/coreclr/jit/simdashwintrinsic.cpp         | 16 +++++-----
 src/coreclr/jit/utils.cpp                     |  1 -
 src/coreclr/jit/valuenum.cpp                  |  6 ----
 .../Internal/Runtime/ReadyToRunConstants.cs   |  1 -
 .../Common/JitInterface/CorInfoHelpFunc.cs    |  1 -
 .../ILCompiler.Compiler/Compiler/JitHelper.cs |  4 ---
 .../JitInterface/CorInfoImpl.ReadyToRun.cs    |  3 --
 .../ReadyToRunSignature.cs                    |  4 ---
 .../JitInterface/CorInfoImpl.RyuJit.cs        |  3 --
 src/coreclr/vm/corelib.h                      |  1 -
 src/coreclr/vm/ecall.cpp                      |  4 ---
 .../System.Private.CoreLib/src/System/Math.cs |  6 ----
 19 files changed, 18 insertions(+), 77 deletions(-)

diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md
index c222ab299e54b..a9a5c8b916303 100644
--- a/docs/design/coreclr/botr/readytorun-format.md
+++ b/docs/design/coreclr/botr/readytorun-format.md
@@ -870,7 +870,6 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_Dbl2UIntOvf               = 0xD5,
     READYTORUN_HELPER_Dbl2ULng                  = 0xD6,
     READYTORUN_HELPER_Dbl2ULngOvf               = 0xD7,
-    READYTORUN_HELPER_Flt2UInt                  = 0xD8,
 
     // Floating point ops
     READYTORUN_HELPER_DblRem                    = 0xE0,
diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index a935dd2e53f2a..b4417e60af4d5 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -393,7 +393,6 @@ enum CorInfoHelpFunc
     CORINFO_HELP_DBL2LNG_OVF,
     CORINFO_HELP_DBL2UINT,              // unused
     CORINFO_HELP_DBL2UINT_OVF,
-    CORINFO_HELP_FLT2UINT,
     CORINFO_HELP_DBL2ULNG,
     CORINFO_HELP_DBL2ULNG_OVF,
     CORINFO_HELP_FLTREM,
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index 8cb878a14a210..e0f65ff5de908 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -59,7 +59,6 @@
     DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG_OVF,  NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT_OVF, NULL,               CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_FLT2UINT,     NULL,               CORINFO_HELP_SIG_8_STACK) //Deepak
     DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG,     NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG_OVF, NULL,               CORINFO_HELP_SIG_8_STACK)
     DYNAMICJITHELPER(CORINFO_HELP_FLTREM,       NULL,               CORINFO_HELP_SIG_8_STACK)
diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h
index 0c84b7f3baef0..41a4aa251fa74 100644
--- a/src/coreclr/inc/readytorun.h
+++ b/src/coreclr/inc/readytorun.h
@@ -399,7 +399,6 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_Dbl2UIntOvf               = 0xD5,
     READYTORUN_HELPER_Dbl2ULng                  = 0xD6,
     READYTORUN_HELPER_Dbl2ULngOvf               = 0xD7,
-    READYTORUN_HELPER_Flt2UInt                  = 0xD8,
 
     // Floating point ops
     READYTORUN_HELPER_DblRem                    = 0xE0,
diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h
index 695d9d886e261..bbb586e8eb4a3 100644
--- a/src/coreclr/inc/readytorunhelpers.h
+++ b/src/coreclr/inc/readytorunhelpers.h
@@ -84,7 +84,6 @@ HELPER(READYTORUN_HELPER_Dbl2Lng,                   CORINFO_HELP_DBL2LNG,
 HELPER(READYTORUN_HELPER_Dbl2LngOvf,                CORINFO_HELP_DBL2LNG_OVF,                       )
 HELPER(READYTORUN_HELPER_Dbl2UInt,                  CORINFO_HELP_DBL2UINT,                          )
 HELPER(READYTORUN_HELPER_Dbl2UIntOvf,               CORINFO_HELP_DBL2UINT_OVF,                      )
-HELPER(READYTORUN_HELPER_Flt2UInt,                  CORINFO_HELP_FLT2UINT,                          )
 HELPER(READYTORUN_HELPER_Dbl2ULng,                  CORINFO_HELP_DBL2ULNG,                          )
 HELPER(READYTORUN_HELPER_Dbl2ULngOvf,               CORINFO_HELP_DBL2ULNG_OVF,                      )
 
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 7f6469f992916..da7f3af42334e 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -1524,13 +1524,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 // SNAN: 0b1000
                 // ZERO: 0b0000:
                 // +ONE: 0b0000
-                // -INF: 0b0000
+                // -INF: 0b1000
                 // +INF: 0b0000
                 // -VAL: 0b1000: Saturate to Zero
                 // +VAL: 0b0000
                 for (int i = 0; i < 16; i++)
                 {
-                    tbl->gtSimdVal.i32[i] = 0x08000088;
+                    tbl->gtSimdVal.i32[i] = 0x08080088;
                 }
 
                 // Generate first operand
@@ -1573,13 +1573,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 // SNAN: 0b1000
                 // ZERO: 0b0000:
                 // +ONE: 0b0000
-                // -INF: 0b0000
+                // -INF: 0b1000
                 // +INF: 0b0000
                 // -VAL: 0b1000: Saturate to Zero
                 // +VAL: 0b0000
                 for (int i = 0; i < 8; i++)
                 {
-                    tbl->gtSimdVal.i64[i] = 0x08000088;
+                    tbl->gtSimdVal.i64[i] = 0x08080088;
                 }
 
                 // Generate first operand
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index cc195cddd7223..7f984051bab11 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -353,11 +353,11 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     // SNAN: 0b1000
                     // ZERO: 0b0000:
                     // +ONE: 0b0000
-                    // -INF: 0b0000
+                    // -INF: 0b1000
                     // +INF: 0b0000
                     // -VAL: 0b1000: Saturate to Zero
                     // +VAL: 0b0000
-                    tbl->gtSimdVal.i32[0] = 0x08000088;
+                    tbl->gtSimdVal.i32[0] = 0x08080088;
 
                     // Generate first operand
                     // The logic is that first and second operand are basically the same because we want
@@ -440,22 +440,6 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     return fgMorphTree(saturate_val);
                 }
             }
-            // does not work, need to convert into helper function
-            else if (srcType == TYP_FLOAT && dstType == TYP_UINT)
-            {
-                return fgMorphCastIntoHelper(tree, CORINFO_HELP_FLT2UINT, oper);
-            }
-            else if (srcType == TYP_DOUBLE && dstType == TYP_UINT)
-            {
-                /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG);
-                tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT);
-                return fgMorphTree(tree);*/
-                return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
-            }
-            else if (srcType == TYP_DOUBLE && dstType == TYP_INT)
-            {
-                return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
-            }
         }
     } while (false);
 
@@ -472,14 +456,13 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #elif defined(TARGET_AMD64)
             // Amd64: src = float, dst = uint64 or overflow conversion.
             // This goes through helper and hence src needs to be converted to double.
-            && (tree->gtOverflow() || ((dstType == TYP_INT || dstType == TYP_ULONG || dstType == TYP_LONG) &&
-                                       !compOpportunisticallyDependsOn(InstructionSet_AVX512F)))
+            && (tree->gtOverflow() || !compOpportunisticallyDependsOn(InstructionSet_AVX512F))
 #elif defined(TARGET_ARM)
             // Arm: src = float, dst = int64/uint64 or overflow conversion.
             && (tree->gtOverflow() || varTypeIsLong(dstType))
 #else
             // x86: src = float, dst = uint32/int64/uint64 or overflow conversion.
-            && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT))
+            && (tree->gtOverflow() || varTypeIsIntegral(dstType))
 #endif
                 )
         {
@@ -519,16 +502,13 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     case TYP_UINT:
 #if defined(TARGET_ARM)
                         return nullptr;
-#else  // TARGET_X86
+#elif defined(TARGET_XARCH)
                         if (tree->IsSaturatedConversion())
                         {
                             return nullptr;
                         }
-                        /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG);
-                        tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT);
-                        return fgMorphTree(tree);*/
+#endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
-#endif // TARGET_X86
 
                     case TYP_LONG:
 #ifdef TARGET_XARCH
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index d1e157ab8dbef..1394501f7da9b 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -1256,17 +1256,17 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                         // The behavior we want is to saturate negative values to 0.
                         GenTreeVecCon* tbl = gtNewVconNode(simdType);
 
-                        // QNAN: 0b0000:
-                        // SNAN: 0b0000
+                        // QNAN: 0b1000:
+                        // SNAN: 0b1000
                         // ZERO: 0b0000:
                         // +ONE: 0b0000
-                        // -INF: 0b0000
+                        // -INF: 0b1000
                         // +INF: 0b0000
                         // -VAL: 0b1000: Saturate to Zero
                         // +VAL: 0b0000
                         for (int i = 0; i < 16; i++)
                         {
-                            tbl->gtSimdVal.i32[i] = 0x08000088;
+                            tbl->gtSimdVal.i32[i] = 0x08080088;
                         }
 
                         // Generate first operand
@@ -1302,17 +1302,17 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                         // The behavior we want is to saturate negative values to 0.
                         GenTreeVecCon* tbl = gtNewVconNode(simdType);
 
-                        // QNAN: 0b0000:
-                        // SNAN: 0b0000
+                        // QNAN: 0b1000:
+                        // SNAN: 0b1000
                         // ZERO: 0b0000:
                         // +ONE: 0b0000
-                        // -INF: 0b0000
+                        // -INF: 0b1000
                         // +INF: 0b0000
                         // -VAL: 0b1000: Saturate to Zero
                         // +VAL: 0b0000
                         for (int i = 0; i < 8; i++)
                         {
-                            tbl->gtSimdVal.i64[i] = 0x08000088;
+                            tbl->gtSimdVal.i64[i] = 0x08080088;
                         }
 
                         // Generate first operand
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 9416cf0c061af..dc55e449c0ed3 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1539,7 +1539,6 @@ void HelperCallProperties::init()
             case CORINFO_HELP_DBL2INT:
             case CORINFO_HELP_DBL2UINT:
             case CORINFO_HELP_DBL2LNG:
-            case CORINFO_HELP_FLT2UINT:
             case CORINFO_HELP_DBL2ULNG:
             case CORINFO_HELP_FLTREM:
             case CORINFO_HELP_DBLREM:
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 2858ab7575c88..a5a3710d161ca 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -12787,11 +12787,6 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
             hasOverflowCheck = true;
             break;
 
-        case CORINFO_HELP_FLT2UINT:
-            castToType   = TYP_UINT;
-            castFromType = TYP_FLOAT;
-            break;
-
         case CORINFO_HELP_DBL2UINT_OVF:
             castToType       = TYP_UINT;
             castFromType     = TYP_DOUBLE;
@@ -13104,7 +13099,6 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
         case CORINFO_HELP_DBL2INT:
         case CORINFO_HELP_DBL2UINT:
         case CORINFO_HELP_DBL2LNG_OVF:
-        case CORINFO_HELP_FLT2UINT:
         case CORINFO_HELP_DBL2UINT_OVF:
         case CORINFO_HELP_DBL2ULNG:
         case CORINFO_HELP_DBL2ULNG_OVF:
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
index 4e8a5bba4d5a8..a37945534865b 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
@@ -311,7 +311,6 @@ public enum ReadyToRunHelper
         Dbl2UIntOvf                 = 0xD5,
         Dbl2ULng                    = 0xD6,
         Dbl2ULngOvf                 = 0xD7,
-        Flt2UInt                    = 0xD8,
 
         // Floating point ops
         DblRem                      = 0xE0,
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index 547ac19096cca..5346806c1aff6 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -36,7 +36,6 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_DBL2LNG_OVF,
         CORINFO_HELP_DBL2UINT,
         CORINFO_HELP_DBL2UINT_OVF,
-        CORINFO_HELP_FLT2UINT,
         CORINFO_HELP_DBL2ULNG,
         CORINFO_HELP_DBL2ULNG_OVF,
         CORINFO_HELP_FLTREM,
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index 31434ca0f979b..852d859b5a740 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -183,10 +183,6 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                 case ReadyToRunHelper.Dbl2Int:
                     methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt");
                     break;
-                case ReadyToRunHelper.Flt2UInt:
-                    methodDesc = context.GetHelperEntryPoint("System", "Math", "FloatToUInt");
-                    //mangledName = "RhpFlt2UInt";
-                    break;
 
                 case ReadyToRunHelper.Dbl2IntOvf:
                     methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToIntOverflow");
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
index 0524c68181cae..ad83b1eb42a5d 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
@@ -1150,9 +1150,6 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT:
                     id = ReadyToRunHelper.Dbl2UInt;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT:
-                    id = ReadyToRunHelper.Flt2UInt;
-                    break;
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF:
                     id = ReadyToRunHelper.Dbl2UIntOvf;
                     break;
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
index 022d17c662257..0eae2f10cb8f0 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
@@ -1909,10 +1909,6 @@ private void ParseHelper(StringBuilder builder)
                     builder.Append("DBL2UINT");
                     break;
 
-                case ReadyToRunHelper.Flt2UInt:
-                    builder.Append("FLT2UINT");
-                    break;
-
                 case ReadyToRunHelper.Dbl2UIntOvf:
                     builder.Append("DBL2UINTOVF");
                     break;
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
index 4484b90c0c035..1d2f977c4c167 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
@@ -678,9 +678,6 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT:
                     id = ReadyToRunHelper.Dbl2UInt;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT:
-                    id = ReadyToRunHelper.Flt2UInt;
-                    break;
                 case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF:
                     id = ReadyToRunHelper.Dbl2UIntOvf;
                     break;
diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h
index a77b7fbd56da1..eef4d60b64b87 100644
--- a/src/coreclr/vm/corelib.h
+++ b/src/coreclr/vm/corelib.h
@@ -274,7 +274,6 @@ DEFINE_METHOD(MATH,                 DOUBLE_TO_ULONG_OVERFLOW,DoubleToULongOverfl
 DEFINE_METHOD(MATH,                 DOUBLE_REMINDER,        DoubleReminder,             NoSig)
 DEFINE_METHOD(MATH,                 DOUBLE_TO_INT,          DoubleToInt,                NoSig)
 DEFINE_METHOD(MATH,                 DOUBLE_TO_UINT,         DoubleToUInt,               NoSig)
-DEFINE_METHOD(MATH,                 FLOAT_TO_UINT,          FloatToUInt,                NoSig)
 DEFINE_METHOD(MATH,                 ROUND,                  Round,                      SM_Dbl_RetDbl)
 
 DEFINE_CLASS(MATHF,                 System,                 MathF)
diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp
index 6c0119a99b3de..fa8cf590c28d6 100644
--- a/src/coreclr/vm/ecall.cpp
+++ b/src/coreclr/vm/ecall.cpp
@@ -205,10 +205,6 @@ void ECall::PopulateManagedHelpers()
     pDest = pMD->GetMultiCallableAddrOfCode();
     SetJitHelperFunction(CORINFO_HELP_DBL2UINT, pDest);
 
-    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__FLOAT_TO_UINT));
-    pDest = pMD->GetMultiCallableAddrOfCode();
-    SetJitHelperFunction(CORINFO_HELP_FLT2UINT, pDest);
-
     pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND));
     pDest = pMD->GetMultiCallableAddrOfCode();
     SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index c154343331a03..d4073a5691911 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1743,11 +1743,5 @@ private static uint DoubleToUInt(double val)
         {
             return double.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val;
         }
-
-        private static uint FloatToUInt(float val)
-        {
-            //return 0;
-            return float.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val;
-        }
     }
 }

From 2cc89d772f7e08e2d0e573da8ae7dfbcd2b92b4b Mon Sep 17 00:00:00 2001
From: Deepak Rajendrakumaran <deepak.rajendrakumaran@intel.com>
Date: Tue, 5 Mar 2024 17:07:06 -0800
Subject: [PATCH 08/14] Handling NaN for double->ulong

---
 src/libraries/System.Private.CoreLib/src/System/Math.cs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index d4073a5691911..371f72d7bf099 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1624,6 +1624,9 @@ private static double ULongToDouble(ulong val)
 
         private static ulong DoubleToULong(double val)
         {
+            if (double.IsNaN(val))
+                return 0;
+
             const double two63 = 2147483648.0 * 4294967296.0;
             ulong ret;
             if (val < two63)

From 51bad69fcd7fd6a7b6a3c2c47994272c56023d85 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 6 Mar 2024 02:17:24 -0800
Subject: [PATCH 09/14] Move System.Math.DoubleToUlong to saturating behavior

---
 .../System.Private.CoreLib/src/System/Math.cs      | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index 371f72d7bf099..a0d5c007c9887 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1627,18 +1627,16 @@ private static ulong DoubleToULong(double val)
             if (double.IsNaN(val))
                 return 0;
 
-            const double two63 = 2147483648.0 * 4294967296.0;
-            ulong ret;
-            if (val < two63)
+            const double two64 = 4294967296.0 * 4294967296;
+            if (val <= 0.0)
             {
-                ret = (ulong)(long)val;
+                return 0;
             }
-            else
+            if (val >= two64)
             {
-                // subtract 0x8000000000000000, do the convert then add it back again
-                ret = (ulong)(long)(val - two63) + 0x8000000000000000UL;
+                return ulong.MaxValue;
             }
-            return ret;
+            return (ulong)(long)val;
         }
 
         [StackTraceHidden]

From 724be9131d28b6816848d183ccec4094c70d63c3 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 6 Mar 2024 15:23:15 -0800
Subject: [PATCH 10/14] Resolving non AVX512F failure

---
 .../System.Private.CoreLib/src/System/Math.cs      | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index a0d5c007c9887..c2967b21e30cc 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1624,19 +1624,27 @@ private static double ULongToDouble(ulong val)
 
         private static ulong DoubleToULong(double val)
         {
+            const double two63 = 2147483648.0 * 4294967296.0;
+#if TARGET_X86 || TARGET_AMD64
             if (double.IsNaN(val))
                 return 0;
 
-            const double two64 = 4294967296.0 * 4294967296;
+            const double two64 = 4294967296.0 * 4294967296.0;
             if (val <= 0.0)
             {
                 return 0;
             }
-            if (val >= two64)
+            else if (val >= two64)
             {
                 return ulong.MaxValue;
             }
-            return (ulong)(long)val;
+#endif //TARGET_X86 || TARGET_AMD64
+            if (val < two63)
+            {
+                return (ulong)(long)val;
+            }
+            // subtract 0x8000000000000000, do the convert then add it back again
+            return (ulong)(long)(val - two63) + 0x8000000000000000UL;
         }
 
         [StackTraceHidden]

From 46eb9c861f120439394d9cdd58c3d1c441a98150 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 6 Mar 2024 20:57:50 -0800
Subject: [PATCH 11/14] Fix DoubleToUint failures and also add doubleToUint
 helper in checks

---
 src/coreclr/vm/jitinterface.cpp                         | 2 ++
 src/libraries/System.Private.CoreLib/src/System/Math.cs | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp
index dbf4aa43de507..cb3eba7360c25 100644
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -10697,6 +10697,8 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULMUL_OVF ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULNG2DBL ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2ULNG ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2INT ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2UINT ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2INT_OVF ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2UINT_OVF ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2LNG_OVF ||
diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index c2967b21e30cc..a4998d0784e1e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -1744,10 +1744,13 @@ private static double DoubleReminder(double dividend, double divisor)
             return FMod(dividend, divisor);
         }
 
+        [StackTraceHidden]
         private static int DoubleToInt(double val)
         {
-            return double.IsNaN(val) || ((long)val < int.MinValue) ? int.MinValue : ((long)val > int.MaxValue) ? int.MaxValue : (int)(long)val;
+            return double.IsNaN(val) ? 0 : (val < -2147483648.0) ? int.MinValue : (val >= 2147483648.0) ? int.MaxValue : (int)(long)val;
         }
+
+        [StackTraceHidden]
         private static uint DoubleToUInt(double val)
         {
             return double.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val;

From 4ca8a860ce7f37b3874b7f4c8c01cb55a0991512 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Thu, 7 Mar 2024 00:14:50 -0800
Subject: [PATCH 12/14] Fix arm failure and nativeAOT

---
 .../IL/ILImporter.Scanner.cs                    | 17 +++++++++++++++++
 src/coreclr/vm/jithelpers.cpp                   |  7 +++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
index e152de9cb404a..9d172a763c76e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
@@ -1273,11 +1273,19 @@ private void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool
             {
                 case WellKnownType.SByte:
                 case WellKnownType.Int16:
+                    if (checkOverflow)
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2IntOvf), "conv_i4_ovf");
+                    }
+                    break;
                 case WellKnownType.Int32:
                     if (checkOverflow)
                     {
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2IntOvf), "conv_i4_ovf");
                     }
+                    else{
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2Int), "conv_i4");
+                    }
                     break;
                 case WellKnownType.Int64:
                     if (checkOverflow)
@@ -1287,11 +1295,20 @@ private void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool
                     break;
                 case WellKnownType.Byte:
                 case WellKnownType.UInt16:
+                    if (checkOverflow)
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UIntOvf), "conv_u8_ovf");
+                    }
+                    break;
                 case WellKnownType.UInt32:
                     if (checkOverflow)
                     {
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UIntOvf), "conv_u8_ovf");
                     }
+                    else
+                    {
+                        _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UInt), "conv_u4");
+                    }
                     break;
                 case WellKnownType.UInt64:
                     if (checkOverflow)
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 70c1b30c4f3f3..7e6118b3a8149 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -388,10 +388,13 @@ HCIMPLEND
 HCIMPL1_V(INT64, JIT_Dbl2Lng, double val)
 {
     FCALL_CONTRACT;
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
     const double int64_min = (double)INT64_MIN;
     const double int64_max = (double)INT64_MAX;
-    return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (INT64)val;
-    //return (INT64)val;
+    return (val != val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (INT64)val;
+#else // TARGET_ARM
+    return (INT64)val;
+#endif //TARGET_X86 || TARGET_AMD64
 }
 HCIMPLEND
 

From d47fd37f89d21faf174281d42e7eb81fbd5db3a0 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Thu, 7 Mar 2024 19:15:23 -0800
Subject: [PATCH 13/14] Add gtSimdCvtNode to cleanup the code and enable new
 implementation in X86

---
 src/coreclr/jit/compiler.h            |  12 ++
 src/coreclr/jit/gentree.cpp           | 123 +++++++++++++++
 src/coreclr/jit/hwintrinsicxarch.cpp  | 185 ++---------------------
 src/coreclr/jit/simdashwintrinsic.cpp | 206 +++-----------------------
 4 files changed, 170 insertions(+), 356 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 3e789dc3a474c..aebb563ca3c8d 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -3172,6 +3172,18 @@ class Compiler
                                  GenTree*    op3,
                                  CorInfoType simdBaseJitType,
                                  unsigned    simdSize);
+    
+    GenTree* gtNewSimdCvtNode(var_types              type,
+                              GenTree*               op1,
+                              NamedIntrinsic         hwIntrinsicID,
+                              CorInfoType            simdTargetBaseJitType,
+                              CorInfoType            simdSourceBaseJitType,
+                              unsigned               simdSize);
+
+    GenTreeVecCon* gtCvtCtrlTbl(var_types   type,
+                                var_types   sourceType,
+                                var_types   targetType,
+                                unsigned    simdSize);
 
     GenTree* gtNewSimdCreateBroadcastNode(
         var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index ffb461ec4d829..0a3a8e89a8c4a 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -21251,6 +21251,129 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s
     return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
 }
 
+#if defined(TARGET_XARCH)
+GenTreeVecCon* Compiler::gtCvtCtrlTbl
+    (var_types type, var_types   sourceType, var_types   targetType, unsigned    simdSize)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+    assert(IsBaselineVector512IsaSupportedDebugOnly());
+    assert(varTypeIsFloating(sourceType));
+    assert(varTypeIsIntegral(targetType));
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+
+    GenTreeVecCon* tbl = gtNewVconNode(type);
+
+    switch (sourceType)
+    {
+        case TYP_DOUBLE:
+            switch (targetType)
+            {
+                case TYP_UINT:
+                case TYP_ULONG:
+                    for (int i = 0; i < 8; i++)
+                    {
+                        tbl->gtSimdVal.i64[i] = 0x08080088;
+                    }
+                    break;
+                
+                case TYP_INT:
+                case TYP_LONG:
+                    for (int i = 0; i < 8; i++)
+                    {
+                        tbl->gtSimdVal.i64[i] = 0x00000088;
+                    }
+                    break;
+                
+                default:
+                    unreached();
+            }
+            break;
+        
+        case TYP_FLOAT:
+            switch (targetType)
+            {
+                case TYP_UINT:
+                case TYP_ULONG:
+                    for (int i = 0; i < 16; i++)
+                    {
+                        tbl->gtSimdVal.i32[i] = 0x08080088;
+                    }
+                    break;
+                
+                case TYP_INT:
+                case TYP_LONG:
+                    for (int i = 0; i < 16; i++)
+                    {
+                        tbl->gtSimdVal.i32[i] = 0x00000088;
+                    }
+                    break;
+                
+                default:
+                    unreached();
+            }
+            break;
+        
+        default:
+            unreached();
+    }
+    return tbl;
+}
+
+GenTree* Compiler::gtNewSimdCvtNode
+    (var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, CorInfoType simdTargetBaseJitType, CorInfoType simdSourceBaseJitType, unsigned simdSize)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+    assert(IsBaselineVector512IsaSupportedDebugOnly());
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdSourceBaseType = JitType2PreciseVarType(simdSourceBaseJitType);
+    var_types simdTargetBaseType = JitType2PreciseVarType(simdTargetBaseJitType);
+    assert(varTypeIsArithmetic(simdSourceBaseType));
+    assert(varTypeIsArithmetic(simdTargetBaseType));
+    assert(varTypeIsFloating(simdSourceBaseType));
+    assert(varTypeIsIntegral(simdTargetBaseType));
+
+    GenTreeVecCon* tbl = gtCvtCtrlTbl(type, simdSourceBaseType, simdTargetBaseType, simdSize);
+
+    GenTree* op1Clone = fgMakeMultiUse(&op1);
+
+    // run vfixupimmsd base on table and no flags reporting
+    GenTree* fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tbl, gtNewIconNode(0),
+                                                 NI_AVX512F_Fixup, simdSourceBaseJitType, simdSize);
+
+    if (varTypeIsSigned(simdTargetBaseType))
+    {
+        ssize_t actualMaxVal = (varTypeIsInt(simdTargetBaseType)) ? INT32_MAX : INT64_MAX;
+
+        GenTree* maxVal = gtNewDconNode(static_cast<double>(actualMaxVal), simdSourceBaseType);
+
+        maxVal = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize);
+        GenTree* maxValDup = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal, simdTargetBaseType), simdTargetBaseJitType, simdSize);
+
+        // we will be using the input value twice
+        GenTree* fixupValDup = fgMakeMultiUse(&fixupVal);
+
+        // usage 1 --> compare with max value of integer
+        fixupVal = gtNewSimdCmpOpNode(GT_GE, type, fixupVal, maxVal, simdSourceBaseJitType, simdSize);
+        // cast it
+
+        GenTree* castNode = gtNewSimdHWIntrinsicNode(type, fixupValDup, hwIntrinsicID, simdSourceBaseJitType, simdSize);
+
+        // usage 2 --> use thecompared mask with input value and max value to blend
+        return gtNewSimdCndSelNode(type, fixupVal, maxValDup, castNode, simdTargetBaseJitType, simdSize);
+    }
+    else
+    {
+        return gtNewSimdHWIntrinsicNode(type, fixupVal, hwIntrinsicID, simdSourceBaseJitType, simdSize);
+    }
+}
+#endif //TARGET_XARCH
+
 GenTree* Compiler::gtNewSimdCmpOpNode(
     genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
 {
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index da7f3af42334e..3b9236ec8906e 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -1446,61 +1446,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         {
             assert(sig->numArgs == 1);
             assert(simdBaseType == TYP_DOUBLE);
-#ifdef TARGET_AMD64
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
                 op1 = impSIMDPopStack();
 
-                var_types simdType = getSIMDTypeForSize(simdSize);
-                // Generate the control table for VFIXUPIMMSD
-                // The behavior we want is to saturate negative values to 0.
-                GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                // QNAN: 0b1000: Saturate to Zero
-                // SNAN: 0b1000: Saturate to Zero
-                // ZERO: 0b0000
-                // +ONE: 0b0000
-                // -INF: 0b0000
-                // +INF: 0b0000
-                // -VAL: 0b0000
-                // +VAL: 0b0000
-                for (int i = 0; i < 8; i++)
-                {
-                    tbl->gtSimdVal.i64[i] = 0x00000088;
-                }
-
-                // Generate first operand
-                // The logic is that first and second operand are basically the same because we want
-                // the output to be in the same xmm register
-                // Hence we clone the first operand
-                GenTree* op2Clone = fgMakeMultiUse(&op1);
-
-                // run vfixupimmsd base on table and no flags reporting
-                GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                 NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
-                GenTree* max_val =
-                    gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast<double>(INT64_MAX)),
-                                                 simdBaseJitType, simdSize);
-                GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG),
-                                                                   CORINFO_TYPE_LONG, simdSize);
-                // we will be using the input value twice
-                GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
-
-                // usage 1 --> compare with max value of integer
-                saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
-                // cast it
-
                 intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
-                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
-                                                                : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
+                          : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
+                                             : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
 
-                retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
-
-                // usage 2 --> use thecompared mask with input value and max value to blend
-                retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG, simdSize);
+                retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
             }
-#endif // TARGET_AMD64
             break;
         }
 
@@ -1509,47 +1464,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector512_ConvertToUInt32:
         {
             assert(sig->numArgs == 1);
-            assert(varTypeIsFloating(simdBaseType));
-#ifdef TARGET_AMD64
+            assert(simdBaseType == TYP_FLOAT);
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
                 op1 = impSIMDPopStack();
-
-                var_types simdType = getSIMDTypeForSize(simdSize);
-                // Generate the control table for VFIXUPIMMSD
-                // The behavior we want is to saturate negative values to 0.
-                GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                // QNAN: 0b1000:
-                // SNAN: 0b1000
-                // ZERO: 0b0000:
-                // +ONE: 0b0000
-                // -INF: 0b1000
-                // +INF: 0b0000
-                // -VAL: 0b1000: Saturate to Zero
-                // +VAL: 0b0000
-                for (int i = 0; i < 16; i++)
-                {
-                    tbl->gtSimdVal.i32[i] = 0x08080088;
-                }
-
-                // Generate first operand
-                // The logic is that first and second operand are basically the same because we want
-                // the output to be in the same xmm register
-                // Hence we clone the first operand
-                GenTree* op2Clone = fgMakeMultiUse(&op1);
-
-                // run vfixupimmsd base on table and no flags reporting
-                GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                             NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
                 intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
-                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
-                                                                : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+                          : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
+                                             : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
 
-                retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+                retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
             }
-#endif // TARGET_AMD64
             break;
         }
 
@@ -1559,46 +1483,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         {
             assert(sig->numArgs == 1);
             assert(simdBaseType == TYP_DOUBLE);
-#ifdef TARGET_AMD64
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
                 op1 = impSIMDPopStack();
-
-                var_types simdType = getSIMDTypeForSize(simdSize);
-                // Generate the control table for VFIXUPIMMSD
-                // The behavior we want is to saturate negative values to 0.
-                GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                // QNAN: 0b1000:
-                // SNAN: 0b1000
-                // ZERO: 0b0000:
-                // +ONE: 0b0000
-                // -INF: 0b1000
-                // +INF: 0b0000
-                // -VAL: 0b1000: Saturate to Zero
-                // +VAL: 0b0000
-                for (int i = 0; i < 8; i++)
-                {
-                    tbl->gtSimdVal.i64[i] = 0x08080088;
-                }
-
-                // Generate first operand
-                // The logic is that first and second operand are basically the same because we want
-                // the output to be in the same xmm register
-                // Hence we clone the first operand
-                GenTree* op2Clone = fgMakeMultiUse(&op1);
-
-                // run vfixupimmsd base on table and no flags reporting
-                GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                             NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
                 intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
-                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
-                                                                : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
+                          : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
+                                             : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
 
-                retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+                retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
             }
-#endif // TARGET_AMD64
             break;
         }
 
@@ -1608,63 +1501,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         {
             assert(sig->numArgs == 1);
             assert(simdBaseType == TYP_FLOAT);
-#ifdef TARGET_AMD64
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
                 op1 = impSIMDPopStack();
-
-                var_types simdType = getSIMDTypeForSize(simdSize);
-                // Generate the control table for VFIXUPIMMSD
-                // The behavior we want is to saturate negative values to 0.
-                GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                // QNAN: 0b1000: Saturate to Zero
-                // SNAN: 0b1000: Saturate to Zero
-                // ZERO: 0b0000
-                // +ONE: 0b0000
-                // -INF: 0b0000
-                // +INF: 0b0000
-                // -VAL: 0b0000
-                // +VAL: 0b0000
-                for (int i = 0; i < 16; i++)
-                {
-                    tbl->gtSimdVal.i32[i] = 0x00000088;
-                }
-
-                // Generate first operand
-                // The logic is that first and second operand are basically the same because we want
-                // the output to be in the same xmm register
-                // Hence we clone the first operand
-                GenTree* op2Clone = fgMakeMultiUse(&op1);
-                // GenTree* op2Clone;
-                // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL,
-                //                     nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion"));
-
-                // run vfixupimmsd base on table and no flags reporting
-                GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                 NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
-                GenTree* max_val = gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast<float>(INT32_MAX)),
-                                                                simdBaseJitType, simdSize);
-                GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT),
-                                                                   CORINFO_TYPE_INT, simdSize);
-                // we will be using the input value twice
-                GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
-
-                // usage 1 --> compare with max value of integer
-                saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
-                // cast it
-
                 intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
-                                             : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
-                                                                : NI_AVX512F_ConvertToVector512Int32WithTruncation;
-
-                retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
+                          : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
+                                             : NI_AVX512F_ConvertToVector512Int32WithTruncation;
 
-                // usage 2 --> use thecompared mask with input value and max value to blend
-                retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT, simdSize);
+                retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
             }
-#endif // TARGET_AMD64
             break;
         }
 
@@ -1693,8 +1538,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())
             {
                 intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single
-                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single
-                                                                : NI_AVX512F_ConvertToVector512Single;
+                          : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single
+                                             : NI_AVX512F_ConvertToVector512Single;
             }
             if (intrinsic != NI_Illegal)
             {
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index 1394501f7da9b..7cdad95169262 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -532,12 +532,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
         case NI_VectorT_ConvertToUInt32:
         case NI_VectorT_ConvertToUInt64:
         {
-#ifdef TARGET_AMD64
+#ifdef TARGET_XARCH
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
                 break;
             }
-#endif // TARGET_AMD64
+#endif // TARGET_XARCH
             return nullptr;
         }
 
@@ -1181,224 +1181,58 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                 {
                     assert(sig->numArgs == 1);
                     assert(simdBaseType == TYP_DOUBLE);
-#ifdef TARGET_AMD64
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        var_types simdType = getSIMDTypeForSize(simdSize);
-                        // Generate the control table for VFIXUPIMMSD
-                        // The behavior we want is to saturate negative values to 0.
-                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                        // QNAN: 0b1000: Saturate to Zero
-                        // SNAN: 0b1000: Saturate to Zero
-                        // ZERO: 0b0000
-                        // +ONE: 0b0000
-                        // -INF: 0b0000
-                        // +INF: 0b0000
-                        // -VAL: 0b0000
-                        // +VAL: 0b0000
-                        for (int i = 0; i < 8; i++)
-                        {
-                            tbl->gtSimdVal.i64[i] = 0x00000088;
-                        }
+                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
+                                                 : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
+                                                                    : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
 
-                        // Generate first operand
-                        // The logic is that first and second operand are basically the same because we want
-                        // the output to be in the same xmm register
-                        // Hence we clone the first operand
-                        GenTree* op2Clone = fgMakeMultiUse(&op1);
-                        // GenTree* op2Clone;
-                        // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL,
-                        //                     nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion"));
-
-                        // run vfixupimmsd base on table and no flags reporting
-                        GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                         NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
-                        GenTree* max_val =
-                            gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast<double>(INT64_MAX)),
-                                                         simdBaseJitType, simdSize);
-                        GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG),
-                                                                           CORINFO_TYPE_LONG, simdSize);
-                        // we will be using the input value twice
-                        GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
-
-                        // usage 1 --> compare with max value of integer
-                        saturate_val =
-                            gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
-                        // cast it
-
-                        NamedIntrinsic intrinsic =
-                            (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
-                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
-                                                                : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
-
-                        GenTree* retNode =
-                            gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
-
-                        // usage 2 --> use thecompared mask with input value and max value to blend
-                        return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG,
-                                                   simdSize);
+                        return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
                     }
-#endif // TARGET_AMD64
                     return nullptr;
                 }
 
                 case NI_VectorT_ConvertToUInt32:
                 {
                     assert(sig->numArgs == 1);
-                    assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT));
-#ifdef TARGET_AMD64
+                    assert(simdBaseType == TYP_FLOAT);
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        var_types simdType = getSIMDTypeForSize(simdSize);
-                        // Generate the control table for VFIXUPIMMSD
-                        // The behavior we want is to saturate negative values to 0.
-                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                        // QNAN: 0b1000:
-                        // SNAN: 0b1000
-                        // ZERO: 0b0000:
-                        // +ONE: 0b0000
-                        // -INF: 0b1000
-                        // +INF: 0b0000
-                        // -VAL: 0b1000: Saturate to Zero
-                        // +VAL: 0b0000
-                        for (int i = 0; i < 16; i++)
-                        {
-                            tbl->gtSimdVal.i32[i] = 0x08080088;
-                        }
-
-                        // Generate first operand
-                        // The logic is that first and second operand are basically the same because we want
-                        // the output to be in the same xmm register
-                        // Hence we clone the first operand
-                        GenTree* op2Clone = fgMakeMultiUse(&op1);
-
-                        // run vfixupimmsd base on table and no flags reporting
-                        GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                     NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
-                        intrinsic = (simdSize == 16)
-                                        ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
-                                        : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
-                                                           : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
+                                                 : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
+                                                                    : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
 
-                        return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+                        return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
                     }
-#endif // TARGET_AMD64
                     return nullptr;
                 }
 
                 case NI_VectorT_ConvertToUInt64:
                 {
                     assert(sig->numArgs == 1);
-                    assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT));
-#ifdef TARGET_AMD64
+                    assert(simdBaseType == TYP_DOUBLE);
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        var_types simdType = getSIMDTypeForSize(simdSize);
-                        // Generate the control table for VFIXUPIMMSD
-                        // The behavior we want is to saturate negative values to 0.
-                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                        // QNAN: 0b1000:
-                        // SNAN: 0b1000
-                        // ZERO: 0b0000:
-                        // +ONE: 0b0000
-                        // -INF: 0b1000
-                        // +INF: 0b0000
-                        // -VAL: 0b1000: Saturate to Zero
-                        // +VAL: 0b0000
-                        for (int i = 0; i < 8; i++)
-                        {
-                            tbl->gtSimdVal.i64[i] = 0x08080088;
-                        }
-
-                        // Generate first operand
-                        // The logic is that first and second operand are basically the same because we want
-                        // the output to be in the same xmm register
-                        // Hence we clone the first operand
-                        GenTree* op2Clone = fgMakeMultiUse(&op1);
-
-                        // run vfixupimmsd base on table and no flags reporting
-                        GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                     NI_AVX512F_Fixup, simdBaseJitType, simdSize);
+                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
+                                                 : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
+                                                                    : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
 
-                        intrinsic = (simdSize == 16)
-                                        ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
-                                        : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
-                                                           : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
-
-                        return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize);
+                        return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
                     }
-#endif // TARGET_AMD64
                     return nullptr;
                 }
 
                 case NI_VectorT_ConvertToInt32:
                 {
                     assert(simdBaseType == TYP_FLOAT);
-#ifdef TARGET_AMD64
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        var_types simdType = getSIMDTypeForSize(simdSize);
-                        // Generate the control table for VFIXUPIMMSD
-                        // The behavior we want is to saturate negative values to 0.
-                        GenTreeVecCon* tbl = gtNewVconNode(simdType);
-
-                        // QNAN: 0b1000: Saturate to Zero
-                        // SNAN: 0b1000: Saturate to Zero
-                        // ZERO: 0b0000
-                        // +ONE: 0b0000
-                        // -INF: 0b0000
-                        // +INF: 0b0000
-                        // -VAL: 0b0000
-                        // +VAL: 0b0000
-                        for (int i = 0; i < 16; i++)
-                        {
-                            tbl->gtSimdVal.i32[i] = 0x00000088;
-                        }
+                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
+                                                 : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
+                                                                    : NI_AVX512F_ConvertToVector512Int32WithTruncation;
 
-                        // Generate first operand
-                        // The logic is that first and second operand are basically the same because we want
-                        // the output to be in the same xmm register
-                        // Hence we clone the first operand
-                        GenTree* op2Clone = fgMakeMultiUse(&op1);
-                        // GenTree* op2Clone;
-                        // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL,
-                        //                     nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion"));
-
-                        // run vfixupimmsd base on table and no flags reporting
-                        GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                         NI_AVX512F_Fixup, simdBaseJitType, simdSize);
-
-                        GenTree* max_val =
-                            gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast<float>(INT32_MAX)),
-                                                         simdBaseJitType, simdSize);
-                        GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT),
-                                                                           CORINFO_TYPE_INT, simdSize);
-                        // we will be using the input value twice
-                        GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
-
-                        // usage 1 --> compare with max value of integer
-                        saturate_val =
-                            gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
-                        // cast it
-
-                        NamedIntrinsic intrinsic =
-                            (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
-                                             : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
-                                                                : NI_AVX512F_ConvertToVector512Int32WithTruncation;
-
-                        GenTree* retNode =
-                            gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize);
-
-                        // usage 2 --> use thecompared mask with input value and max value to blend
-                        return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT,
-                                                   simdSize);
+                        return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
                     }
-#endif // TARGET_AMD64
                     return nullptr;
                 }
 

From 71201e2f0dea3ff76a816b41ca14c3119b4e53de Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 8 Mar 2024 01:17:55 -0800
Subject: [PATCH 14/14] Enable the accelerated instructions for conversion for
 X86

---
 src/coreclr/jit/compiler.h            |  4 +-
 src/coreclr/jit/gentree.cpp           | 54 +++++++++++++++++----------
 src/coreclr/jit/hwintrinsicxarch.cpp  | 27 +++++++-------
 src/coreclr/jit/simdashwintrinsic.cpp | 28 ++++++++------
 4 files changed, 66 insertions(+), 47 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index aebb563ca3c8d..d5d4f01ccb59d 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -3172,7 +3172,7 @@ class Compiler
                                  GenTree*    op3,
                                  CorInfoType simdBaseJitType,
                                  unsigned    simdSize);
-    
+#if defined(TARGET_XARCH)
     GenTree* gtNewSimdCvtNode(var_types              type,
                               GenTree*               op1,
                               NamedIntrinsic         hwIntrinsicID,
@@ -3184,7 +3184,7 @@ class Compiler
                                 var_types   sourceType,
                                 var_types   targetType,
                                 unsigned    simdSize);
-
+#endif //TARGET_XARCH
     GenTree* gtNewSimdCreateBroadcastNode(
         var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 0a3a8e89a8c4a..1ecf5058ea7fe 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -21252,8 +21252,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s
 }
 
 #if defined(TARGET_XARCH)
-GenTreeVecCon* Compiler::gtCvtCtrlTbl
-    (var_types type, var_types   sourceType, var_types   targetType, unsigned    simdSize)
+GenTreeVecCon* Compiler::gtCvtCtrlTbl(var_types type, var_types sourceType, var_types targetType, unsigned simdSize)
 {
     assert(IsBaselineSimdIsaSupportedDebugOnly());
     assert(IsBaselineVector512IsaSupportedDebugOnly());
@@ -21262,7 +21261,6 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl
     assert(varTypeIsSIMD(type));
     assert(getSIMDTypeForSize(simdSize) == type);
 
-
     GenTreeVecCon* tbl = gtNewVconNode(type);
 
     switch (sourceType)
@@ -21277,7 +21275,7 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl
                         tbl->gtSimdVal.i64[i] = 0x08080088;
                     }
                     break;
-                
+
                 case TYP_INT:
                 case TYP_LONG:
                     for (int i = 0; i < 8; i++)
@@ -21285,12 +21283,12 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl
                         tbl->gtSimdVal.i64[i] = 0x00000088;
                     }
                     break;
-                
+
                 default:
                     unreached();
             }
             break;
-        
+
         case TYP_FLOAT:
             switch (targetType)
             {
@@ -21301,7 +21299,7 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl
                         tbl->gtSimdVal.i32[i] = 0x08080088;
                     }
                     break;
-                
+
                 case TYP_INT:
                 case TYP_LONG:
                     for (int i = 0; i < 16; i++)
@@ -21309,20 +21307,24 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl
                         tbl->gtSimdVal.i32[i] = 0x00000088;
                     }
                     break;
-                
+
                 default:
                     unreached();
             }
             break;
-        
+
         default:
             unreached();
     }
     return tbl;
 }
 
-GenTree* Compiler::gtNewSimdCvtNode
-    (var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, CorInfoType simdTargetBaseJitType, CorInfoType simdSourceBaseJitType, unsigned simdSize)
+GenTree* Compiler::gtNewSimdCvtNode(var_types      type,
+                                    GenTree*       op1,
+                                    NamedIntrinsic hwIntrinsicID,
+                                    CorInfoType    simdTargetBaseJitType,
+                                    CorInfoType    simdSourceBaseJitType,
+                                    unsigned       simdSize)
 {
     assert(IsBaselineSimdIsaSupportedDebugOnly());
     assert(IsBaselineVector512IsaSupportedDebugOnly());
@@ -21343,17 +21345,29 @@ GenTree* Compiler::gtNewSimdCvtNode
     GenTree* op1Clone = fgMakeMultiUse(&op1);
 
     // run vfixupimmsd base on table and no flags reporting
-    GenTree* fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tbl, gtNewIconNode(0),
-                                                 NI_AVX512F_Fixup, simdSourceBaseJitType, simdSize);
+    GenTree* fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tbl, gtNewIconNode(0), NI_AVX512F_Fixup,
+                                                 simdSourceBaseJitType, simdSize);
 
     if (varTypeIsSigned(simdTargetBaseType))
     {
-        ssize_t actualMaxVal = (varTypeIsInt(simdTargetBaseType)) ? INT32_MAX : INT64_MAX;
-
-        GenTree* maxVal = gtNewDconNode(static_cast<double>(actualMaxVal), simdSourceBaseType);
-
-        maxVal = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize);
-        GenTree* maxValDup = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal, simdTargetBaseType), simdTargetBaseJitType, simdSize);
+        GenTree* maxVal;
+        GenTree* maxValDup;
+        if (varTypeIsLong(simdTargetBaseType))
+        {
+            long long actualMaxVal = INT64_MAX;
+            maxVal                 = gtNewDconNode(static_cast<double>(actualMaxVal), simdSourceBaseType);
+            maxVal                 = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize);
+            maxValDup =
+                gtNewSimdCreateBroadcastNode(type, gtNewLconNode(actualMaxVal), simdTargetBaseJitType, simdSize);
+        }
+        else
+        {
+            ssize_t actualMaxVal = INT32_MAX;
+            maxVal               = gtNewDconNode(static_cast<double>(actualMaxVal), simdSourceBaseType);
+            maxVal               = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize);
+            maxValDup            = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal, simdTargetBaseType),
+                                                     simdTargetBaseJitType, simdSize);
+        }
 
         // we will be using the input value twice
         GenTree* fixupValDup = fgMakeMultiUse(&fixupVal);
@@ -21372,7 +21386,7 @@ GenTree* Compiler::gtNewSimdCvtNode
         return gtNewSimdHWIntrinsicNode(type, fixupVal, hwIntrinsicID, simdSourceBaseJitType, simdSize);
     }
 }
-#endif //TARGET_XARCH
+#endif // TARGET_XARCH
 
 GenTree* Compiler::gtNewSimdCmpOpNode(
     genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 3b9236ec8906e..b4a8e6495b1ff 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -1451,8 +1451,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 op1 = impSIMDPopStack();
 
                 intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
-                          : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
-                                             : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
 
                 retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
             }
@@ -1467,13 +1467,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             assert(simdBaseType == TYP_FLOAT);
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
-                op1 = impSIMDPopStack();
+                op1       = impSIMDPopStack();
                 intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
-                          : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
-                                             : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
 
                 retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
             }
+#
             break;
         }
 
@@ -1485,10 +1486,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             assert(simdBaseType == TYP_DOUBLE);
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
-                op1 = impSIMDPopStack();
+                op1       = impSIMDPopStack();
                 intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
-                          : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
-                                             : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
 
                 retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
             }
@@ -1503,10 +1504,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             assert(simdBaseType == TYP_FLOAT);
             if (IsBaselineVector512IsaSupportedOpportunistically())
             {
-                op1 = impSIMDPopStack();
+                op1       = impSIMDPopStack();
                 intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
-                          : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
-                                             : NI_AVX512F_ConvertToVector512Int32WithTruncation;
+                                             : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512Int32WithTruncation;
 
                 retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
             }
@@ -1538,8 +1539,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())
             {
                 intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single
-                          : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single
-                                             : NI_AVX512F_ConvertToVector512Single;
+                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single
+                                                                : NI_AVX512F_ConvertToVector512Single;
             }
             if (intrinsic != NI_Illegal)
             {
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index 7cdad95169262..bad5f1c1111ed 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -1183,9 +1183,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     assert(simdBaseType == TYP_DOUBLE);
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
-                                                 : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
-                                                                    : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
+                        NamedIntrinsic intrinsic =
+                            (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
 
                         return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
                     }
@@ -1198,9 +1199,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     assert(simdBaseType == TYP_FLOAT);
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
-                                                 : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
-                                                                    : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+                        NamedIntrinsic intrinsic =
+                            (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512UInt32WithTruncation;
 
                         return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
                     }
@@ -1213,9 +1215,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     assert(simdBaseType == TYP_DOUBLE);
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
-                                                 : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
-                                                                    : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
+                        NamedIntrinsic intrinsic =
+                            (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation
+                                             : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation
+                                                                : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
 
                         return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
                     }
@@ -1227,9 +1230,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     assert(simdBaseType == TYP_FLOAT);
                     if (IsBaselineVector512IsaSupportedOpportunistically())
                     {
-                        NamedIntrinsic intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
-                                                 : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
-                                                                    : NI_AVX512F_ConvertToVector512Int32WithTruncation;
+                        NamedIntrinsic intrinsic =
+                            (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation
+                                             : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
+                                                                : NI_AVX512F_ConvertToVector512Int32WithTruncation;
 
                         return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
                     }