From 7d764be028c51efc10dafd00013cda1884c212ec Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Tue, 9 May 2023 16:22:00 -0700
Subject: [PATCH 01/40] fixing the JITDbl2Ulng helper function. The new AVX512
 instruction vcvtsd2usi uses ulong.max_value to show FPE for negative, NAN and
 ulong_max + 1 values.

---
 src/coreclr/vm/jithelpers.cpp                    |  5 +++++
 .../out_of_range_fp_to_int_conversions.cpp       | 15 ++++-----------
 .../out_of_range_fp_to_int_conversions.cs        | 16 ++--------------
 3 files changed, 11 insertions(+), 25 deletions(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index d4ce2c9aa69ac..2949df0d53284 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -589,7 +589,11 @@ HCIMPLEND
 HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 {
     FCALL_CONTRACT;
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+    const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN;
+    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val;
 
+#else
     const double two63  = 2147483648.0 * 4294967296.0;
     UINT64 ret;
     if (val < two63) {
@@ -600,6 +604,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
         ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
     }
     return ret;
+#endif
 }
 HCIMPLEND
 
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index eaf7f2fa1a9da..db690e1160f80 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -124,6 +124,7 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
     if (t == CONVERT_NATIVECOMPILERBEHAVIOR)
         return (uint64_t)x;
 
+    double input_val = x;
     x = trunc(x); // truncate (round toward zero)
 
     // (double)UINT64_MAX cannot be represented exactly as double
@@ -153,18 +154,10 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
                 return (uint64_t)ConvertDoubleToInt64(x - int64_max_plus_1, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
             }
         }
-
+    
     case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-        if (x < int64_max_plus_1)
-        {
-            return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x;
-        }
-        else
-        {
-            x -= int64_max_plus_1;
-            x = trunc(x);
-            return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000);
-        }
+        return ((input_val != input_val) || (input_val < 0) || (input_val >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)input_val;
+
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
     }
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index 5b78783c09e4c..49197e7965feb 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -171,6 +171,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
             if (t == FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR)
                 return (ulong)x;
 
+            double input_val = x;
             x = Truncate(x); // truncate (round toward zero)
 
             // (double)ULLONG_MAX cannot be represented exactly as double
@@ -199,21 +200,8 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                             return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
                         }
                     }
-
                 case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-
-                    if (x < two63)
-                    {
-                        return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x;
-                    }
-                    else
-                    {
-                        // (double)LLONG_MAX cannot be represented exactly as double
-                        const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1);
-                        x -= two63;
-                        x = Math.Truncate(x);
-                        return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000);
-                    }
+                    return (Double.IsNaN(input_val) || (input_val < 0) || (input_val >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)input_val;
             }
 
             return 0;

From f50408b6b9f6bef3b64b2f54922ca377f44bb1ab Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Wed, 10 May 2023 03:20:29 -0700
Subject: [PATCH 02/40] Making changes to the library test case expected output
 based on the architecture. This is because we have changed the JITDbl2Ulng
 helper function to mimic the new IEEE compliant AVX512 instruction
 vcvtsd2usi. In the process, we needed to update the library test case because
 the default Floating Point Error (FPE) value for the new instruction is
 different from the default MSVC FPE value i.e. 0.

---
 .../tests/System/UIntPtrTests.GenericMath.cs    | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
index 2e752a91af21f..117c87db6ce9e 100644
--- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
+++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
@@ -12,6 +12,7 @@ public class UIntPtrTests_GenericMath
         //
         // IAdditionOperators
         //
+        public static Architecture arch = RuntimeInformation.ProcessArchitecture;
 
         [Fact]
         public static void op_AdditionTest()
@@ -2223,7 +2224,7 @@ public static void CreateSaturatingFromDoubleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.MinValue));
 
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.NaN));
         }
 
         [Fact]
@@ -2244,7 +2245,7 @@ public static void CreateSaturatingFromHalfTest()
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.NegativeInfinity));
 
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.MinValue));
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.NaN));
         }
 
         [Fact]
@@ -2351,7 +2352,7 @@ public static void CreateSaturatingFromNFloatTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.MinValue));
 
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.NaN));
         }
 
         [Fact]
@@ -2396,7 +2397,7 @@ public static void CreateSaturatingFromSingleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.MinValue));
 
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.NaN));
         }
 
         [Fact]
@@ -2535,7 +2536,7 @@ public static void CreateTruncatingFromDoubleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.MinValue));
 
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.NaN));
         }
 
         [Fact]
@@ -2556,7 +2557,7 @@ public static void CreateTruncatingFromHalfTest()
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.NegativeInfinity));
 
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.MinValue));
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.NaN));
         }
 
         [Fact]
@@ -2685,7 +2686,7 @@ public static void CreateTruncatingFromNFloatTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.MinValue));
 
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.NaN));
         }
 
         [Fact]
@@ -2741,7 +2742,7 @@ public static void CreateTruncatingFromSingleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.MinValue));
 
-            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.NaN));
+            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.NaN));
         }
 
         [Fact]

From f018095ace63b13fda4fe91f24d3d7913c10fb85 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Fri, 12 May 2023 13:32:23 -0700
Subject: [PATCH 03/40] Fixing the JITDbl2Ulng helper function. Also making
 sure that we are not changing the library test case but the API to make sure
 NaN cases are handled.

---
 src/coreclr/scripts/jitformat.py                 | 12 +++++++-----
 src/coreclr/vm/jithelpers.cpp                    |  3 ++-
 .../System.Private.CoreLib/src/System/Double.cs  |  2 +-
 .../System.Private.CoreLib/src/System/Half.cs    |  2 +-
 .../src/System/Runtime/InteropServices/NFloat.cs |  2 +-
 .../System.Private.CoreLib/src/System/Single.cs  |  2 +-
 .../tests/System/UIntPtrTests.GenericMath.cs     | 16 ++++++++--------
 .../out_of_range_fp_to_int_conversions.cpp       |  6 ++----
 .../out_of_range_fp_to_int_conversions.cs        |  5 ++---
 9 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py
index 51a096c59cd3c..497a5a12290e3 100644
--- a/src/coreclr/scripts/jitformat.py
+++ b/src/coreclr/scripts/jitformat.py
@@ -21,6 +21,7 @@
 import tarfile
 import tempfile
 import zipfile
+import time
 
 class ChangeDir:
     def __init__(self, dir):
@@ -81,7 +82,7 @@ def main(argv):
     args, unknown = parser.parse_known_args(argv)
 
     if unknown:
-        logging.warning('Ignoring argument(s): {}'.format(','.join(unknown)))
+        logging.warn('Ignoring argument(s): {}'.format(','.join(unknown)))
 
     if args.coreclr is None:
         logging.error('Specify --coreclr')
@@ -140,10 +141,11 @@ def main(argv):
         bootstrapPath = os.path.join(temp_location, bootstrapFilename)
 
         assert len(os.listdir(os.path.dirname(bootstrapPath))) == 0
-
-        if not jitutil.download_one_url(bootstrapUrl, bootstrapPath):
-            logging.error("Did not download bootstrap!")
-            return -1
+        print(bootstrapPath)
+        time.sleep(60)
+        # if not jitutil.download_one_url(bootstrapUrl, bootstrapPath):
+        #     logging.error("Did not download bootstrap!")
+        #     return -1
 
         if platform == 'windows':
             # Need to ensure we have Windows line endings on the downloaded script file,
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 2949df0d53284..d1e9193e252a6 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -590,8 +590,9 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 {
     FCALL_CONTRACT;
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
+
     const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN;
-    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val;
+    return ((val != val) || ((val < 0) && (val + 1 <= 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val;
 
 #else
     const double two63  = 2147483648.0 * 4294967296.0;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Double.cs b/src/libraries/System.Private.CoreLib/src/System/Double.cs
index aaa637ae02a6f..c459a648b44a6 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Double.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Double.cs
@@ -1400,7 +1400,7 @@ private static bool TryConvertTo<TOther>(double value, [MaybeNullWhen(false)] ou
             {
 #if TARGET_64BIT
                 nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) :
-                                     (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
+                                     (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
 #else
diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs
index 6415acc9c798e..07a7027359487 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Half.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs
@@ -1883,7 +1883,7 @@ private static bool TryConvertTo<TOther>(Half value, [MaybeNullWhen(false)] out
             else if (typeof(TOther) == typeof(nuint))
             {
                 nuint actualResult = (value == PositiveInfinity) ? nuint.MaxValue :
-                                     (value <= Zero) ? nuint.MinValue : (nuint)value;
+                                     (value <= Zero || IsNaN(value)) ? nuint.MinValue : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
             }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs
index e5645feb21ffa..d8f35715ff0bf 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs
@@ -1754,7 +1754,7 @@ private static bool TryConvertTo<TOther>(NFloat value, [MaybeNullWhen(false)] ou
                 return true;
 #else
                 nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) :
-                                     (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
+                                     (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
 #endif
diff --git a/src/libraries/System.Private.CoreLib/src/System/Single.cs b/src/libraries/System.Private.CoreLib/src/System/Single.cs
index 42d63de43279b..3219e9b27d585 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Single.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Single.cs
@@ -1380,7 +1380,7 @@ private static bool TryConvertTo<TOther>(float value, [MaybeNullWhen(false)] out
             {
 #if TARGET_64BIT
                 nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) :
-                                     (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
+                                     (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
 #else
diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
index 117c87db6ce9e..414788a4c4742 100644
--- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
+++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
@@ -2224,7 +2224,7 @@ public static void CreateSaturatingFromDoubleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.MinValue));
 
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<double>(double.NaN));
         }
 
         [Fact]
@@ -2245,7 +2245,7 @@ public static void CreateSaturatingFromHalfTest()
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.NegativeInfinity));
 
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.MinValue));
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<Half>(Half.NaN));
         }
 
         [Fact]
@@ -2352,7 +2352,7 @@ public static void CreateSaturatingFromNFloatTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.MinValue));
 
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<NFloat>(NFloat.NaN));
         }
 
         [Fact]
@@ -2397,7 +2397,7 @@ public static void CreateSaturatingFromSingleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.MinValue));
 
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateSaturating<float>(float.NaN));
         }
 
         [Fact]
@@ -2536,7 +2536,7 @@ public static void CreateTruncatingFromDoubleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.MinValue));
 
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<double>(double.NaN));
         }
 
         [Fact]
@@ -2557,7 +2557,7 @@ public static void CreateTruncatingFromHalfTest()
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.NegativeInfinity));
 
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.MinValue));
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<Half>(Half.NaN));
         }
 
         [Fact]
@@ -2686,7 +2686,7 @@ public static void CreateTruncatingFromNFloatTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.MinValue));
 
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<NFloat>(NFloat.NaN));
         }
 
         [Fact]
@@ -2742,7 +2742,7 @@ public static void CreateTruncatingFromSingleTest()
             Assert.Equal(nuint.MaxValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.MaxValue));
             Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.MinValue));
 
-            Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.NaN));
+            Assert.Equal(nuint.MinValue, NumberBaseHelper<nuint>.CreateTruncating<float>(float.NaN));
         }
 
         [Fact]
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index db690e1160f80..3890fcac11a3d 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -124,7 +124,6 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
     if (t == CONVERT_NATIVECOMPILERBEHAVIOR)
         return (uint64_t)x;
 
-    double input_val = x;
     x = trunc(x); // truncate (round toward zero)
 
     // (double)UINT64_MAX cannot be represented exactly as double
@@ -138,6 +137,7 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
         return ((x != x) || (x < INT64_MIN) || (x >= uint64_max_plus_1)) ? (uint64_t)INT64_MIN : (x < 0) ? (uint64_t)(int64_t)x : (uint64_t)x;
 
     case CONVERT_SENTINEL:
+    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
         return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x;
 
     case CONVERT_SATURATING:
@@ -155,9 +155,7 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
             }
         }
     
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-        return ((input_val != input_val) || (input_val < 0) || (input_val >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)input_val;
-
+    
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
     }
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index 49197e7965feb..e2be91c974fec 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -171,7 +171,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
             if (t == FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR)
                 return (ulong)x;
 
-            double input_val = x;
             x = Truncate(x); // truncate (round toward zero)
 
             // (double)ULLONG_MAX cannot be represented exactly as double
@@ -184,6 +183,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                     return (Double.IsNaN(x) || (x < long.MinValue) || (x >= ullong_max_plus_1)) ? unchecked((ulong)long.MinValue): (x < 0) ? (ulong)(long)x: (ulong)x;
 
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
+                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                     return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x;
 
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
@@ -200,8 +200,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                             return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
                         }
                     }
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-                    return (Double.IsNaN(input_val) || (input_val < 0) || (input_val >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)input_val;
+                
             }
 
             return 0;

From ffe97cd63fb098f8a38cf3de9218fe0feddc6303 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Fri, 12 May 2023 13:33:26 -0700
Subject: [PATCH 04/40] reverting jitformat

---
 src/coreclr/scripts/jitformat.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py
index 497a5a12290e3..ad63529fa7580 100644
--- a/src/coreclr/scripts/jitformat.py
+++ b/src/coreclr/scripts/jitformat.py
@@ -21,7 +21,6 @@
 import tarfile
 import tempfile
 import zipfile
-import time
 
 class ChangeDir:
     def __init__(self, dir):
@@ -141,11 +140,10 @@ def main(argv):
         bootstrapPath = os.path.join(temp_location, bootstrapFilename)
 
         assert len(os.listdir(os.path.dirname(bootstrapPath))) == 0
-        print(bootstrapPath)
-        time.sleep(60)
-        # if not jitutil.download_one_url(bootstrapUrl, bootstrapPath):
-        #     logging.error("Did not download bootstrap!")
-        #     return -1
+
+        if not jitutil.download_one_url(bootstrapUrl, bootstrapPath):
+            logging.error("Did not download bootstrap!")
+            return -1
 
         if platform == 'windows':
             # Need to ensure we have Windows line endings on the downloaded script file,

From a8ee861013973bf7141233602b677740550bae70 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Mon, 15 May 2023 16:31:05 -0700
Subject: [PATCH 05/40] Adding a truncate function to the Dbl2Ulng helper to
 make sure we avoid handling edge cases (-1,0) separately inside the helper.

---
 src/coreclr/vm/jithelpers.cpp | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index d1e9193e252a6..1d9dbb77cc2de 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -572,6 +572,30 @@ FORCEINLINE INT64 FastDbl2Lng(double val)
 #endif
 }
 
+/*********************************************************************/
+// helper function to truncate double numbers to nearest integer (round towards zero)
+double TrucateDouble(double val)
+{
+    FCALL_CONTRACT;
+    int64_t *dintVal = (int64_t *)&val;
+
+    uint64_t uintVal = (uint64_t)*dintVal;
+    int exponent = (int)((uintVal >> 52) & 0x7FF);
+    if (exponent < 1023)
+    {
+        uintVal = uintVal & 0x8000000000000000ull;
+    }
+    else if (exponent < 1075)
+    {
+        uintVal = uintVal &  (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023)));
+    }
+    int64_t intVal = (int64_t)uintVal;
+    double *doubleVal = (double *)&intVal;
+    double retVal = *doubleVal;
+
+    return retVal;
+}
+
 /*********************************************************************/
 HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val)
 {
@@ -592,7 +616,9 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
 
     const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN;
-    return ((val != val) || ((val < 0) && (val + 1 <= 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val;
+    val = TrucateDouble(val);
+    //return ((val != val) || ((val < 0) && (val + 1 < 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val;
+    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val;
 
 #else
     const double two63  = 2147483648.0 * 4294967296.0;

From bbd8a8b6e3d0564996b310be63b216dbbf0deb3e Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Tue, 16 May 2023 00:24:42 -0700
Subject: [PATCH 06/40] Adding code to handle vectorized conversion for
 float/double to/from ulong/uint

---
 src/coreclr/jit/codegenxarch.cpp | 20 +++++++++++++++++---
 src/coreclr/jit/emitxarch.cpp    | 12 ++++++++++--
 src/coreclr/jit/instr.cpp        | 16 ++++++++++++++++
 src/coreclr/jit/lowerxarch.cpp   |  8 ++++----
 src/coreclr/jit/morph.cpp        |  4 ++++
 5 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 75d8e5432c4ae..c9c5677e1e796 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7338,6 +7338,18 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     noway_assert(srcType != TYP_UINT);
     noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
 
+    if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+    {
+        if (srcType == TYP_ULONG && (dstType == TYP_DOUBLE || dstType == TYP_FLOAT))
+        {
+            genConsumeOperands(treeNode->AsOp());
+            instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
+            GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
+            genProduceReg(treeNode);
+            return;
+        }
+    }
+
     // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
     // which does a partial write to lower 4/8 bytes of xmm register keeping the other
     // upper bytes unmodified.  If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
@@ -7449,8 +7461,10 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
     // We shouldn't be seeing uint64 here as it should have been converted
-    // into a helper call by either front-end or lowering phase.
-    noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+    // into a helper call by either front-end or lowering phase, unless we have AVX512F
+    // accelerated conversions.
+    noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
+                 compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
@@ -7463,7 +7477,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // Note that we need to specify dstType here so that it will determine
     // the size of destination integer register and also the rex.w prefix.
     genConsumeOperands(treeNode->AsOp());
-    instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
+    instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
     GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
     genProduceReg(treeNode);
 }
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 13307006a6db3..088d8ba03a49f 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -18595,15 +18595,23 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_cvtsi2sd64:
         case INS_cvtsi2ss64:
         case INS_vcvtsd2usi:
-        case INS_vcvttsd2usi:
         case INS_vcvtusi2sd32:
-        case INS_vcvtusi2sd64:
         case INS_vcvtusi2ss32:
         case INS_vcvtusi2ss64:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_7C;
             break;
 
+        case INS_vcvttsd2usi:
+            result.insLatency += PERFSCORE_LATENCY_6C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case INS_vcvtusi2sd64:
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency += PERFSCORE_LATENCY_5C;
+            break;
+
         case INS_cvttss2si:
         case INS_cvtss2si:
         case INS_vcvtss2usi:
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 93c4e601bb781..82f8166d4e81d 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2281,6 +2281,9 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
 instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
 {
     // AVX: For now we support only conversion from Int/Long -> float
+    // AVX512: Supports following conversions
+    //   srcType = float/double                    castToType = ulong
+    //   srcType = ulong                           castToType = double
 
     switch (from)
     {
@@ -2329,6 +2332,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
                     return ins_Move_Extend(TYP_FLOAT, false);
                 case TYP_DOUBLE:
                     return INS_cvtss2sd;
+                case TYP_ULONG:
+                    return INS_vcvttss2usi;
                 default:
                     unreached();
             }
@@ -2341,6 +2346,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
                     return INS_cvttsd2si;
                 case TYP_LONG:
                     return INS_cvttsd2si;
+                case TYP_ULONG:
+                    return INS_vcvttsd2usi;
                 case TYP_FLOAT:
                     return INS_cvtsd2ss;
                 case TYP_DOUBLE:
@@ -2350,6 +2357,15 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
             }
             break;
 
+        case TYP_ULONG:
+            switch (to)
+            {
+                case TYP_DOUBLE:
+                    return INS_vcvtusi2sd64;
+                default:
+                    unreached();
+            }
+
         default:
             unreached();
     }
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index aa4258d71ba77..bba730cc5faee 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -795,15 +795,15 @@ void Lowering::LowerCast(GenTree* tree)
     //   srcType = float/double                    castToType = * and overflow detecting cast
     //       Reason: must be converted to a helper call
     //   srcType = float/double,                   castToType = ulong
-    //       Reason: must be converted to a helper call
+    //       Reason: must be converted to a helper call unless we have AVX512F
     //   srcType = uint                            castToType = float/double
     //       Reason: uint -> float/double = uint -> long -> float/double
     //   srcType = ulong                           castToType = float
     //       Reason: ulong -> float = ulong -> double -> float
-    if (varTypeIsFloating(srcType))
+    if (srcType == TYP_FLOAT)
     {
-        noway_assert(!tree->gtOverflow());
-        noway_assert(castToType != TYP_ULONG);
+        noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
     }
     else if (srcType == TYP_UINT)
     {
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 8e4c6612b41a1..400ca8e48a34c 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -357,6 +357,10 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #endif // !TARGET_AMD64
 
                     case TYP_ULONG:
+#ifdef TARGET_AMD64
+                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+                            return nullptr;
+#endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
                     default:
                         unreached();

From a21a0775a257bc693b0cebf8bb6591d5344260f5 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Tue, 16 May 2023 11:45:15 -0700
Subject: [PATCH 07/40] reverting changes for float to ulong

---
 src/coreclr/jit/morph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 400ca8e48a34c..ead3703537ca4 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -358,7 +358,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 
                     case TYP_ULONG:
 #ifdef TARGET_AMD64
-                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) && srcType != TYP_FLOAT)
                             return nullptr;
 #endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);

From 1e3415ab924b641373e03413f6b15a9a389a9e27 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Tue, 16 May 2023 14:28:24 -0700
Subject: [PATCH 08/40] enabling float to ulong conversion

---
 src/coreclr/jit/morph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index ead3703537ca4..400ca8e48a34c 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -358,7 +358,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 
                     case TYP_ULONG:
 #ifdef TARGET_AMD64
-                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) && srcType != TYP_FLOAT)
+                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
                             return nullptr;
 #endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);

From c788c67f0d97526bddf4effa617ee30eb65cee1a Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Wed, 17 May 2023 00:35:51 -0700
Subject: [PATCH 09/40] Making change to set w1 bit for evex

---
 src/coreclr/jit/instrsxarch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 161df4485e0d9..382896ffe61ab 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -633,7 +633,7 @@ INST3(vcmpsd,           "cmpsd",            IUM_WR, BAD_CODE,               BAD_
 INST3(vcvtpd2udq,       "cvtpd2udq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x79),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to unsigned DWORDs
 INST3(vcvtps2udq,       "cvtps2udq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x79),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed singles to unsigned DWORDs
 INST3(vcvtsd2usi,       "cvtsd2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar double to unsigned DWORD/QWORD
-INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
+INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
 INST3(vcvttpd2udq,      "cvttpd2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned DWORDs
 INST3(vcvttps2udq,      "cvttps2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned DWORDs
 INST3(vcvttsd2usi,      "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD/QWORD

From fbb2a90f8b87aed086005b68fd4fa05bf5e4efae Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Thu, 18 May 2023 00:25:58 -0700
Subject: [PATCH 10/40] merging with main. Picking up hwintrinsiclistxarh from
 main trying to return EA_4BYTE for INS_vcvttss2usi to make sure that we read
 dword and not qword for float to ulong

---
 src/coreclr/jit/emit.h        |  6 ++++++
 src/coreclr/jit/emitxarch.cpp | 27 +++++++++++++--------------
 src/coreclr/jit/instr.cpp     |  2 +-
 src/coreclr/jit/instrsxarch.h |  3 ++-
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 9cab8e6fcea2f..5bbaa389df189 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -3891,6 +3891,12 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
             return EA_32BYTE;
         }
 
+        case INS_vcvttss2usi64:
+        case INS_vcvttss2usi32:
+        {
+            return EA_4BYTE;
+        }
+        
         case INS_movddup:
         {
             if (defaultSize == 64)
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 088d8ba03a49f..ef6a90c7113fa 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1399,17 +1399,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
             case INS_vcvtsd2usi:
             case INS_vcvtss2usi:
             case INS_vcvttsd2usi:
-            case INS_vcvttss2usi:
-            {
-                if (attr == EA_8BYTE)
-                {
-                    return true;
-                }
-
-                // TODO-Cleanup: This should really only ever be EA_4BYTE
-                assert((attr == EA_4BYTE) || (attr == EA_16BYTE));
-                return false;
-            }
 
             case INS_vbroadcastsd:
             case INS_vpbroadcastq:
@@ -2623,7 +2612,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
         case INS_vcvtsd2usi:
         case INS_vcvtss2usi:
         case INS_vcvttsd2usi:
-        case INS_vcvttss2usi:
+        case INS_vcvttss2usi32:
+        case INS_vcvttss2usi64:
         {
             // These SSE instructions write to a general purpose integer register.
             return false;
@@ -11435,7 +11425,7 @@ void emitter::emitDispIns(
                 case INS_vcvtsd2usi:
                 case INS_vcvtss2usi:
                 case INS_vcvttsd2usi:
-                case INS_vcvttss2usi:
+                //case INS_vcvttss2usi:
                 {
                     printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
                     break;
@@ -18615,10 +18605,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_cvttss2si:
         case INS_cvtss2si:
         case INS_vcvtss2usi:
-        case INS_vcvttss2usi:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C;
             break;
+        
+        case INS_vcvttss2usi32:
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency += PERFSCORE_LATENCY_7C;
+            break;
+        
+        case INS_vcvttss2usi64:
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency += PERFSCORE_LATENCY_8C;
+            break;
 
         case INS_cvtss2sd:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 82f8166d4e81d..cdeca11b327ee 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2333,7 +2333,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
                 case TYP_DOUBLE:
                     return INS_cvtss2sd;
                 case TYP_ULONG:
-                    return INS_vcvttss2usi;
+                    return INS_vcvttss2usi64;
                 default:
                     unreached();
             }
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 382896ffe61ab..0f16b1c87297f 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -637,7 +637,8 @@ INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_
 INST3(vcvttpd2udq,      "cvttpd2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned DWORDs
 INST3(vcvttps2udq,      "cvttps2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned DWORDs
 INST3(vcvttsd2usi,      "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD/QWORD
-INST3(vcvttss2usi,      "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
+INST3(vcvttss2usi32,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
+INST3(vcvttss2usi64,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
 INST3(vcvtudq2pd,       "cvtudq2pd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7A),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed unsigned DWORDs to doubles
 INST3(vcvtudq2ps,       "cvtudq2ps",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x7A),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed unsigned DWORDs to singles
 INST3(vcvtusi2sd32,     "cvtusi2sd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x7B),                  INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar unsigned DWORD to double

From 9fece01dd61191f06df0c91f42910ba643551b9f Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Thu, 18 May 2023 01:16:12 -0700
Subject: [PATCH 11/40] jit format

---
 src/coreclr/jit/emit.h        |  2 +-
 src/coreclr/jit/emitxarch.cpp | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 5bbaa389df189..5fbc31bb116c4 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -3896,7 +3896,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
         {
             return EA_4BYTE;
         }
-        
+
         case INS_movddup:
         {
             if (defaultSize == 64)
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index ef6a90c7113fa..bcebaa6e89bed 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -11425,11 +11425,11 @@ void emitter::emitDispIns(
                 case INS_vcvtsd2usi:
                 case INS_vcvtss2usi:
                 case INS_vcvttsd2usi:
-                //case INS_vcvttss2usi:
-                {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
-                    break;
-                }
+                    // case INS_vcvttss2usi:
+                    {
+                        printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
+                        break;
+                    }
 
 #ifdef TARGET_AMD64
                 case INS_movsxd:
@@ -18608,12 +18608,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C;
             break;
-        
+
         case INS_vcvttss2usi32:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_7C;
             break;
-        
+
         case INS_vcvttss2usi64:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_8C;

From b40cd8ed897600b1cf29e2e1292f267ffdd0562d Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Thu, 18 May 2023 14:00:03 -0700
Subject: [PATCH 12/40] Splitting vcvttss2usi to vcvttss2usi32 and
 vcvttss2usi64. Also adding a special handling for vcvttss2usi64 to make sure
 we read only dword instead of qword for float to ulong conversion

---
 src/coreclr/jit/emit.h        |  7 +++++--
 src/coreclr/jit/emitxarch.cpp | 26 +++++++++++++++++++++-----
 src/coreclr/jit/instrsxarch.h |  2 +-
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 5fbc31bb116c4..2b08bb8c03d08 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -3892,9 +3892,12 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
         }
 
         case INS_vcvttss2usi64:
-        case INS_vcvttss2usi32:
         {
-            return EA_4BYTE;
+            if (defaultSize == 8)
+            {
+                return EA_4BYTE;
+            }
+            return defaultSize;
         }
 
         case INS_movddup:
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index bcebaa6e89bed..937ea7a7fcc55 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1399,6 +1399,16 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
             case INS_vcvtsd2usi:
             case INS_vcvtss2usi:
             case INS_vcvttsd2usi:
+            {
+                if (attr == EA_8BYTE)
+                {
+                    return true;
+                }
+
+                // TODO-Cleanup: This should really only ever be EA_4BYTE
+                assert((attr == EA_4BYTE) || (attr == EA_16BYTE));
+                return false;
+            }
 
             case INS_vbroadcastsd:
             case INS_vpbroadcastq:
@@ -11425,11 +11435,17 @@ void emitter::emitDispIns(
                 case INS_vcvtsd2usi:
                 case INS_vcvtss2usi:
                 case INS_vcvttsd2usi:
-                    // case INS_vcvttss2usi:
-                    {
-                        printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
-                        break;
-                    }
+                {
+                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
+                    break;
+                }
+
+                case INS_vcvttss2usi32:
+                case INS_vcvttss2usi64:
+                {
+                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE));
+                    break;
+                }
 
 #ifdef TARGET_AMD64
                 case INS_movsxd:
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 0f16b1c87297f..add7a79abbde6 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -633,7 +633,7 @@ INST3(vcmpsd,           "cmpsd",            IUM_WR, BAD_CODE,               BAD_
 INST3(vcvtpd2udq,       "cvtpd2udq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x79),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to unsigned DWORDs
 INST3(vcvtps2udq,       "cvtps2udq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x79),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed singles to unsigned DWORDs
 INST3(vcvtsd2usi,       "cvtsd2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar double to unsigned DWORD/QWORD
-INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
+INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
 INST3(vcvttpd2udq,      "cvttpd2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned DWORDs
 INST3(vcvttps2udq,      "cvttps2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned DWORDs
 INST3(vcvttsd2usi,      "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD/QWORD

From 710026eab3381ba459b358ab2fae5cb638b7e159 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Thu, 18 May 2023 16:42:40 -0700
Subject: [PATCH 13/40] undoing jitformat changes due to merge error

---
 src/coreclr/scripts/jitformat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py
index ad63529fa7580..51a096c59cd3c 100644
--- a/src/coreclr/scripts/jitformat.py
+++ b/src/coreclr/scripts/jitformat.py
@@ -81,7 +81,7 @@ def main(argv):
     args, unknown = parser.parse_known_args(argv)
 
     if unknown:
-        logging.warn('Ignoring argument(s): {}'.format(','.join(unknown)))
+        logging.warning('Ignoring argument(s): {}'.format(','.join(unknown)))
 
     if args.coreclr is None:
         logging.error('Specify --coreclr')

From 75e6acfc43e6c0e38f0ed4c3440523fef847d3da Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Fri, 19 May 2023 15:03:03 -0700
Subject: [PATCH 14/40] removing unused code and correcting throughput and
 latency information for vcvttsd2usi, vcvttusi2sd32/64

---
 src/coreclr/jit/emitxarch.cpp                             | 8 ++------
 src/coreclr/vm/jithelpers.cpp                             | 1 -
 .../tests/System/UIntPtrTests.GenericMath.cs              | 1 -
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 937ea7a7fcc55..cc6816af87099 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -18601,19 +18601,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_cvtsi2sd64:
         case INS_cvtsi2ss64:
         case INS_vcvtsd2usi:
-        case INS_vcvtusi2sd32:
         case INS_vcvtusi2ss32:
         case INS_vcvtusi2ss64:
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency += PERFSCORE_LATENCY_7C;
-            break;
-
         case INS_vcvttsd2usi:
-            result.insLatency += PERFSCORE_LATENCY_6C;
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency += PERFSCORE_LATENCY_7C;
             break;
 
         case INS_vcvtusi2sd64:
+        case INS_vcvtusi2sd32:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_5C;
             break;
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 1d9dbb77cc2de..78db022d9ea03 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -617,7 +617,6 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 
     const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN;
     val = TrucateDouble(val);
-    //return ((val != val) || ((val < 0) && (val + 1 < 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val;
     return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val;
 
 #else
diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
index 414788a4c4742..2e752a91af21f 100644
--- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
+++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs
@@ -12,7 +12,6 @@ public class UIntPtrTests_GenericMath
         //
         // IAdditionOperators
         //
-        public static Architecture arch = RuntimeInformation.ProcessArchitecture;
 
         [Fact]
         public static void op_AdditionTest()

From e15be4b072f18cc4d8b9554e7bb1de0fb407dedb Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Fri, 19 May 2023 15:04:39 -0700
Subject: [PATCH 15/40] correcting throughput and latency for vcvttss2usi32 and
 placing it with other similar instructions

---
 src/coreclr/jit/emitxarch.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index cc6816af87099..9f9ffdc5a614e 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -18604,6 +18604,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_vcvtusi2ss32:
         case INS_vcvtusi2ss64:
         case INS_vcvttsd2usi:
+        case INS_vcvttss2usi32:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_7C;
             break;
@@ -18621,11 +18622,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C;
             break;
 
-        case INS_vcvttss2usi32:
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency += PERFSCORE_LATENCY_7C;
-            break;
-
         case INS_vcvttss2usi64:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_8C;

From 10e28769b60ceadf3500ed14299cf21b6247f209 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Fri, 19 May 2023 16:11:52 -0700
Subject: [PATCH 16/40] formatting

---
 src/coreclr/vm/jithelpers.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 78db022d9ea03..54e2cb7de63ca 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -572,9 +572,17 @@ FORCEINLINE INT64 FastDbl2Lng(double val)
 #endif
 }
 
-/*********************************************************************/
-// helper function to truncate double numbers to nearest integer (round towards zero)
-double TrucateDouble(double val)
+//------------------------------------------------------------------------
+// TruncateDouble: helper function to truncate double 
+//                 numbers to nearest integer (round towards zero).
+//
+// Arguments:
+//    val  - double number to be truncated.
+//
+// Return Value:
+//    truncated number (rounded towards zero)
+// 
+double TruncateDouble(double val)
 {
     FCALL_CONTRACT;
     int64_t *dintVal = (int64_t *)&val;
@@ -616,7 +624,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
 
     const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN;
-    val = TrucateDouble(val);
+    val = TruncateDouble(val);
     return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val;
 
 #else

From 9463173664bb823cc8f130ba92f0dda79efbc993 Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Fri, 19 May 2023 16:13:32 -0700
Subject: [PATCH 17/40] formatting

---
 src/coreclr/vm/jithelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 54e2cb7de63ca..c2aeefc0e93dc 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -638,7 +638,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
         ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
     }
     return ret;
-#endif
+#endif // TARGET_X86 || TARGET_AMD64
 }
 HCIMPLEND
 

From 4f7bb670ed06916a279c82ee0d4f39aec737591b Mon Sep 17 00:00:00 2001
From: Khushal Modi <khushal.chandresh.modi@intel.com>
Date: Sun, 21 May 2023 22:02:05 -0700
Subject: [PATCH 18/40] updating comments

---
 src/coreclr/vm/jithelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index c2aeefc0e93dc..476ca84d90ee0 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -580,7 +580,7 @@ FORCEINLINE INT64 FastDbl2Lng(double val)
 //    val  - double number to be truncated.
 //
 // Return Value:
-//    truncated number (rounded towards zero)
+//    double: truncated number (rounded towards zero)
 // 
 double TruncateDouble(double val)
 {

From a99725c6dc67dd035489359a0b71649939fb52b4 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 24 May 2023 11:41:05 -0700
Subject: [PATCH 19/40] updating code for github comments. Using
 compIsaSupportedDebugOnly for nowayasserts and also checking for float and
 doubel both in lowercast for overflow and conversion to ulong

---
 src/coreclr/jit/codegenxarch.cpp | 2 +-
 src/coreclr/jit/emit.h           | 6 +-----
 src/coreclr/jit/lowerxarch.cpp   | 6 +++---
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index c9c5677e1e796..f958ed60b8064 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // into a helper call by either front-end or lowering phase, unless we have AVX512F
     // accelerated conversions.
     noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-                 compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+                 compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 2b08bb8c03d08..e2b3b350963b5 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -3893,11 +3893,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
 
         case INS_vcvttss2usi64:
         {
-            if (defaultSize == 8)
-            {
-                return EA_4BYTE;
-            }
-            return defaultSize;
+            return EA_4BYTE;
         }
 
         case INS_movddup:
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index bba730cc5faee..1508a4f5962df 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -800,10 +800,10 @@ void Lowering::LowerCast(GenTree* tree)
     //       Reason: uint -> float/double = uint -> long -> float/double
     //   srcType = ulong                           castToType = float
     //       Reason: ulong -> float = ulong -> double -> float
-    if (srcType == TYP_FLOAT)
+    if (varTypeIsFloating(srcType))
     {
-        noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
-        noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        noway_assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        noway_assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
     }
     else if (srcType == TYP_UINT)
     {

From 44390b22c64d36e8b3b14bb958d62a8ae3f2ca47 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 24 May 2023 13:07:53 -0700
Subject: [PATCH 20/40] reverting to original checks for ISA supported Debug
 only because they are not available in release mode

---
 src/coreclr/jit/codegenxarch.cpp | 4 ++--
 src/coreclr/jit/lowerxarch.cpp   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index f958ed60b8064..5ea1360ae3c7c 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7349,7 +7349,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
             return;
         }
     }
-
+    
     // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
     // which does a partial write to lower 4/8 bytes of xmm register keeping the other
     // upper bytes unmodified.  If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
@@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // into a helper call by either front-end or lowering phase, unless we have AVX512F
     // accelerated conversions.
     noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-                 compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+                    compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 1508a4f5962df..d049ab74c52dd 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree)
     //       Reason: ulong -> float = ulong -> double -> float
     if (varTypeIsFloating(srcType))
     {
-        noway_assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
-        noway_assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
     }
     else if (srcType == TYP_UINT)
     {

From 2f20ef35cc5e1ab78b3612f8fadb8f449bea46f4 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 24 May 2023 16:39:08 -0700
Subject: [PATCH 21/40] running jitformat

---
 src/coreclr/jit/codegenxarch.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 5ea1360ae3c7c..f958ed60b8064 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7349,7 +7349,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
             return;
         }
     }
-    
+
     // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
     // which does a partial write to lower 4/8 bytes of xmm register keeping the other
     // upper bytes unmodified.  If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
@@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // into a helper call by either front-end or lowering phase, unless we have AVX512F
     // accelerated conversions.
     noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-                    compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+                 compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.

From b7dff8aaca98b5bea175a30674ddcefe6bd6d451 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 24 May 2023 20:55:58 -0700
Subject: [PATCH 22/40] running jitformat

---
 src/coreclr/jit/codegenxarch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index f958ed60b8064..c9c5677e1e796 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // into a helper call by either front-end or lowering phase, unless we have AVX512F
     // accelerated conversions.
     noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-                 compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+                 compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.

From 9622f78ba3b32ead44ffd5dd429e8fe01befc189 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 16 Jun 2023 20:34:35 -0700
Subject: [PATCH 23/40] combine the 2 nodes GT_CAST(GT_CAST(TYP_ULONG,
 TYP_DOUBLE), TYP_FLOAT) into a single node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT)

---
 src/coreclr/jit/codegenxarch.cpp |  3 ++-
 src/coreclr/jit/importer.cpp     | 10 +++++++++
 src/coreclr/jit/instr.cpp        |  2 ++
 src/coreclr/jit/lowerxarch.cpp   |  2 +-
 src/coreclr/jit/morph.cpp        | 37 +++++++++++++++++++++++++++++++-
 5 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index c9c5677e1e796..a289b49e9b5e3 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7336,7 +7336,8 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
     // here since they should have been lowered appropriately.
     noway_assert(srcType != TYP_UINT);
-    noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
+    noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
+                 compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index de914ea0bdfdc..4aea31a14a762 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -7883,6 +7883,16 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                                || (impStackTop().val->TypeGet() == TYP_BYREF)
 #endif
                         ;
+#ifdef TARGET_AMD64
+                    // If AVX512 is present and we are not checking for overflow, we do not need
+                    // a large node. In this case, we will not fallback to a helper function but
+                    // will use the intrinsic instead. Hence setting the callNode to false to
+                    // avoid generating a large node.
+                    if (callNode && compOpportunisticallyDependsOn(InstructionSet_AVX512F) && !ovfl)
+                    {
+                        callNode = false;
+                    }
+#endif // TARGET_AMD64
                 }
                 else
                 {
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index cdeca11b327ee..43cd4ce2ddc17 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2362,6 +2362,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
             {
                 case TYP_DOUBLE:
                     return INS_vcvtusi2sd64;
+                case TYP_FLOAT:
+                    return INS_vcvtusi2ss64;
                 default:
                     unreached();
             }
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index d049ab74c52dd..4e9caf1783830 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -811,7 +811,7 @@ void Lowering::LowerCast(GenTree* tree)
     }
     else if (srcType == TYP_ULONG)
     {
-        noway_assert(castToType != TYP_FLOAT);
+        noway_assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
     }
 
     // Case of src is a small type and dst is a floating point type.
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 400ca8e48a34c..9822a7ea44fd6 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -293,6 +293,41 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     var_types dstType = tree->CastToType();
     unsigned  dstSize = genTypeSize(dstType);
 
+#if defined(TARGET_AMD64)
+    // If AVX512 is present, we have intrinsic available to convert
+    // ulong directly to float. Hence, we need to combine the 2 nodes
+    // GT_CAST(GT_CAST(TYP_ULONG, TYP_DOUBLE), TYP_FLOAT) into a single
+    // node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT). At this point, we already
+    // have the 2 GT_CAST nodes in the tree and we are combining them below.
+    if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+    {
+        if (oper->OperIs(GT_CAST))
+        {
+            GenTreeCast* innerCast = static_cast<GenTreeCast*>(oper);
+
+            if (innerCast->IsUnsigned())
+            {
+                GenTree*  innerOper    = innerCast->CastOp();
+                var_types innerSrcType = genActualType(innerOper);
+                var_types innerDstType = innerCast->CastToType();
+                unsigned  innerDstSize = genTypeSize(innerDstType);
+                innerSrcType           = varTypeToUnsigned(innerSrcType);
+
+                if (innerSrcType == TYP_ULONG)
+                {
+                    if (dstType == TYP_FLOAT && innerDstType == TYP_DOUBLE)
+                    {
+                        // One optimized cast here
+                        tree         = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT);
+                        tree->gtType = TYP_FLOAT;
+                        return fgMorphTree(tree);
+                    }
+                }
+            }
+        }
+    }
+#endif
+
     // See if the cast has to be done in two steps.  R -> I
     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
     {
@@ -453,7 +488,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     {
         srcType = varTypeToUnsigned(srcType);
 
-        if (srcType == TYP_ULONG)
+        if (srcType == TYP_ULONG && !compOpportunisticallyDependsOn(InstructionSet_AVX512F))
         {
             if (dstType == TYP_FLOAT)
             {

From d3b542f1a93b7d315fc91d16ac238a35ec4dfc8d Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Sun, 18 Jun 2023 16:27:56 -0700
Subject: [PATCH 24/40] merging with main and updating hwintrinsiclistxarch to
 take into consideration 32bit and 64 bit version of vcvttss2usi.

---
 src/coreclr/jit/hwintrinsiclistxarch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index e1649b2159c55..a11d80e02f958 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -845,7 +845,7 @@ HARDWARE_INTRINSIC(AVX512F,         CompareNotEqual,
 HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Double,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2sd32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2ss32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32,                            16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi,        INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi32,      INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128Byte,                     64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovdb,            INS_vpmovdb,            INS_vpmovqb,            INS_vpmovqb,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128ByteWithSaturation,       64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovusdb,          INS_invalid,            INS_vpmovusqb,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128Int16,                    64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqw,            INS_vpmovqw,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
@@ -1002,7 +1002,7 @@ HARDWARE_INTRINSIC(AVX512F_VL,      TernaryLogic,
 HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Double,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2sd64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2ss64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64,                            16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi,        INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi64,      INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                               SIMD size       NumArg  EncodesExtraTypeArg                                                                                                       Instructions                                                                                                                  Category                            Flags

From 8343e18b20213333adb210e24dc023c4bd854180 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Sun, 18 Jun 2023 20:06:51 -0700
Subject: [PATCH 25/40] Changing noway_assert to assert to make sure
 compOpportunisticallyDependsOn only runs in debug mode.

---
 src/coreclr/jit/codegenxarch.cpp | 8 ++++----
 src/coreclr/jit/lowerxarch.cpp   | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index a289b49e9b5e3..7d4463d653d3b 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7336,8 +7336,8 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
     // here since they should have been lowered appropriately.
     noway_assert(srcType != TYP_UINT);
-    noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
-                 compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+    assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
+            compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
@@ -7464,8 +7464,8 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // We shouldn't be seeing uint64 here as it should have been converted
     // into a helper call by either front-end or lowering phase, unless we have AVX512F
     // accelerated conversions.
-    noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-                 compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
+           compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 4e9caf1783830..e10306c2a78dd 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree)
     //       Reason: ulong -> float = ulong -> double -> float
     if (varTypeIsFloating(srcType))
     {
-        noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
-        noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
     }
     else if (srcType == TYP_UINT)
     {
@@ -811,7 +811,7 @@ void Lowering::LowerCast(GenTree* tree)
     }
     else if (srcType == TYP_ULONG)
     {
-        noway_assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
     }
 
     // Case of src is a small type and dst is a floating point type.

From e4567633d1b269116a29d3e96cc2cdc21954286b Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Sun, 18 Jun 2023 20:25:12 -0700
Subject: [PATCH 26/40] running jitformat

---
 src/coreclr/jit/codegenxarch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 7d4463d653d3b..dc9a09b7d17a8 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7337,7 +7337,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     // here since they should have been lowered appropriately.
     noway_assert(srcType != TYP_UINT);
     assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
-            compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+           compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
 
     if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {

From fdb28c6520e14fbd0db5a2b46cc5007183f21b45 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Tue, 20 Jun 2023 09:38:48 -0700
Subject: [PATCH 27/40] Changing compOpportunisticallyDependsOn to
 compIsaSupportedDebugOnly in asserts aka code review changes

---
 src/coreclr/jit/codegenxarch.cpp | 4 ++--
 src/coreclr/jit/lowerxarch.cpp   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index dc9a09b7d17a8..898c69ec4cf6a 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7337,7 +7337,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     // here since they should have been lowered appropriately.
     noway_assert(srcType != TYP_UINT);
     assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
-           compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+           compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
@@ -7465,7 +7465,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // into a helper call by either front-end or lowering phase, unless we have AVX512F
     // accelerated conversions.
     assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-           compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+           compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index e10306c2a78dd..94632d1411e79 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree)
     //       Reason: ulong -> float = ulong -> double -> float
     if (varTypeIsFloating(srcType))
     {
-        assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
-        assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
     }
     else if (srcType == TYP_UINT)
     {
@@ -811,7 +811,7 @@ void Lowering::LowerCast(GenTree* tree)
     }
     else if (srcType == TYP_ULONG)
     {
-        assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
+        assert(castToType != TYP_FLOAT || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
     }
 
     // Case of src is a small type and dst is a floating point type.

From e9ff179f92548bf28a863c1eba466d3133fb2caf Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Thu, 22 Jun 2023 12:58:11 -0700
Subject: [PATCH 28/40] Making code review changes. Moving around the
 comOpportunisticallyDependsOn checks to make sure they are ran only if we
 need AVX512. These checks being costly, moving them to the innermost checks
 in nested if checks.

---
 src/coreclr/jit/codegenxarch.cpp | 17 +++++++--------
 src/coreclr/jit/importer.cpp     |  6 ++++--
 src/coreclr/jit/morph.cpp        | 36 +++++++++++++++-----------------
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 898c69ec4cf6a..7789c25074863 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7339,16 +7339,15 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
            compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
-    if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+    if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) &&
+        compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
-        if (srcType == TYP_ULONG && (dstType == TYP_DOUBLE || dstType == TYP_FLOAT))
-        {
-            genConsumeOperands(treeNode->AsOp());
-            instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
-            GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
-            genProduceReg(treeNode);
-            return;
-        }
+        assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        genConsumeOperands(treeNode->AsOp());
+        instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
+        GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
+        genProduceReg(treeNode);
+        return;
     }
 
     // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 4aea31a14a762..9c38a69d6854d 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -7886,9 +7886,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 #ifdef TARGET_AMD64
                     // If AVX512 is present and we are not checking for overflow, we do not need
                     // a large node. In this case, we will not fallback to a helper function but
-                    // will use the intrinsic instead. Hence setting the callNode to false to
+                    // will use the intrinsic instead. This is done for all long/ulong to floating
+                    // point conversions. Hence setting the callNode to false to
                     // avoid generating a large node.
-                    if (callNode && compOpportunisticallyDependsOn(InstructionSet_AVX512F) && !ovfl)
+                    if (callNode && !ovfl && varTypeIsLong(impStackTop().val) &&
+                        compOpportunisticallyDependsOn(InstructionSet_AVX512F))
                     {
                         callNode = false;
                     }
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 9822a7ea44fd6..dd63ecca7494d 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -299,34 +299,32 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     // GT_CAST(GT_CAST(TYP_ULONG, TYP_DOUBLE), TYP_FLOAT) into a single
     // node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT). At this point, we already
     // have the 2 GT_CAST nodes in the tree and we are combining them below.
-    if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
+    if (oper->OperIs(GT_CAST))
     {
-        if (oper->OperIs(GT_CAST))
+        GenTreeCast* innerCast = static_cast<GenTreeCast*>(oper);
+
+        if (innerCast->IsUnsigned())
         {
-            GenTreeCast* innerCast = static_cast<GenTreeCast*>(oper);
+            GenTree*  innerOper    = innerCast->CastOp();
+            var_types innerSrcType = genActualType(innerOper);
+            var_types innerDstType = innerCast->CastToType();
+            unsigned  innerDstSize = genTypeSize(innerDstType);
+            innerSrcType           = varTypeToUnsigned(innerSrcType);
 
-            if (innerCast->IsUnsigned())
+            // Check if we are going from ulong->double->float
+            if (innerSrcType == TYP_ULONG && innerDstType == TYP_DOUBLE && dstType == TYP_FLOAT)
             {
-                GenTree*  innerOper    = innerCast->CastOp();
-                var_types innerSrcType = genActualType(innerOper);
-                var_types innerDstType = innerCast->CastToType();
-                unsigned  innerDstSize = genTypeSize(innerDstType);
-                innerSrcType           = varTypeToUnsigned(innerSrcType);
-
-                if (innerSrcType == TYP_ULONG)
+                if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
                 {
-                    if (dstType == TYP_FLOAT && innerDstType == TYP_DOUBLE)
-                    {
-                        // One optimized cast here
-                        tree         = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT);
-                        tree->gtType = TYP_FLOAT;
-                        return fgMorphTree(tree);
-                    }
+                    // One optimized (combined) cast here
+                    tree         = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT);
+                    tree->gtType = TYP_FLOAT;
+                    return fgMorphTree(tree);
                 }
             }
         }
     }
-#endif
+#endif // TARGET_AMD64
 
     // See if the cast has to be done in two steps.  R -> I
     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))

From db2a0cb242f12f59ad45c7fc8ad25727dea1c3aa Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 23 Jun 2023 10:49:48 -0700
Subject: [PATCH 29/40] FCALL_CONTRACT should be only used on FCalls itself

---
 src/coreclr/vm/jithelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 476ca84d90ee0..3e9466efa42ed 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -584,7 +584,7 @@ FORCEINLINE INT64 FastDbl2Lng(double val)
 // 
 double TruncateDouble(double val)
 {
-    FCALL_CONTRACT;
+    LIMITED_METHOD_CONTRACT;
     int64_t *dintVal = (int64_t *)&val;
 
     uint64_t uintVal = (uint64_t)*dintVal;

From 167b563f07a19480f438e17c4767f522d8598342 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 23 Jun 2023 13:13:06 -0700
Subject: [PATCH 30/40] Making paralle changes to JITHelper in MathHelper for
 native AOT

---
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index 9ad553ce15647..cf554efd56308 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -18,8 +18,46 @@ FORCEINLINE int64_t FastDbl2Lng(double val)
 #endif
 }
 
+//------------------------------------------------------------------------
+// TruncateDouble: helper function to truncate double 
+//                 numbers to nearest integer (round towards zero).
+//
+// Arguments:
+//    val  - double number to be truncated.
+//
+// Return Value:
+//    double: truncated number (rounded towards zero)
+// 
+double TruncateDouble(double val)
+{
+    int64_t *dintVal = (int64_t *)&val;
+
+    uint64_t uintVal = (uint64_t)*dintVal;
+    int exponent = (int)((uintVal >> 52) & 0x7FF);
+    if (exponent < 1023)
+    {
+        uintVal = uintVal & 0x8000000000000000ull;
+    }
+    else if (exponent < 1075)
+    {
+        uintVal = uintVal &  (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023)));
+    }
+    int64_t intVal = (int64_t)uintVal;
+    double *doubleVal = (double *)&intVal;
+    double retVal = *doubleVal;
+
+    return retVal;
+}
+
 EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
 {
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+
+    const double uint64_max_plus_1 = -2.0 * (double)LONG_MIN;
+    val = TruncateDouble(val);
+    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? ULONG_MAX : (uint64_t)val;
+
+#else
     const double two63  = 2147483648.0 * 4294967296.0;
     uint64_t ret;
     if (val < two63)
@@ -32,6 +70,7 @@ EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
         ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
     }
     return ret;
+#endif // TARGET_X86 || TARGET_AMD64
 }
 
 #undef min

From b02a96c6548abc958dae482f01203fb6ecc161cf Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 23 Jun 2023 14:06:04 -0700
Subject: [PATCH 31/40] resolving regression issues

---
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index cf554efd56308..4909d6624c71c 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -53,9 +53,9 @@ EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
 {
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
 
-    const double uint64_max_plus_1 = -2.0 * (double)LONG_MIN;
+    const double uint64_max_plus_1 = -2.0 * (double)0xFFFFFFFF;
     val = TruncateDouble(val);
-    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? ULONG_MAX : (uint64_t)val;
+    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? 0xFFFFFFFF : (uint64_t)val;
 
 #else
     const double two63  = 2147483648.0 * 4294967296.0;

From fc0d127815f2240230e9b60cf4a5270d4a468ecc Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 30 Jun 2023 13:26:13 -0700
Subject: [PATCH 32/40] Rolling back changes for double/float -> ulong

---
 src/coreclr/jit/codegenxarch.cpp              |  8 ++--
 src/coreclr/jit/emit.h                        |  5 ---
 src/coreclr/jit/instr.cpp                     |  7 +---
 src/coreclr/jit/lowerxarch.cpp                |  6 +--
 src/coreclr/jit/morph.cpp                     |  4 --
 src/coreclr/vm/jithelpers.cpp                 | 40 -------------------
 .../src/System/Double.cs                      |  2 +-
 .../System.Private.CoreLib/src/System/Half.cs |  2 +-
 .../System/Runtime/InteropServices/NFloat.cs  |  2 +-
 .../src/System/Single.cs                      |  2 +-
 .../out_of_range_fp_to_int_conversions.cpp    | 14 ++++++-
 .../out_of_range_fp_to_int_conversions.cs     | 16 +++++++-
 12 files changed, 38 insertions(+), 70 deletions(-)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 7789c25074863..8635d699c0fd9 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -7461,10 +7461,8 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
     // We shouldn't be seeing uint64 here as it should have been converted
-    // into a helper call by either front-end or lowering phase, unless we have AVX512F
-    // accelerated conversions.
-    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
-           compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+    // into a helper call by either front-end or lowering phase.
+    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
@@ -7477,7 +7475,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // Note that we need to specify dstType here so that it will determine
     // the size of destination integer register and also the rex.w prefix.
     genConsumeOperands(treeNode->AsOp());
-    instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
+    instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
     GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
     genProduceReg(treeNode);
 }
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index e2b3b350963b5..9cab8e6fcea2f 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -3891,11 +3891,6 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
             return EA_32BYTE;
         }
 
-        case INS_vcvttss2usi64:
-        {
-            return EA_4BYTE;
-        }
-
         case INS_movddup:
         {
             if (defaultSize == 64)
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 43cd4ce2ddc17..c7f57bbccc719 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2282,8 +2282,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
 {
     // AVX: For now we support only conversion from Int/Long -> float
     // AVX512: Supports following conversions
-    //   srcType = float/double                    castToType = ulong
-    //   srcType = ulong                           castToType = double
+    //   srcType = ulong                           castToType = double/float
 
     switch (from)
     {
@@ -2332,8 +2331,6 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
                     return ins_Move_Extend(TYP_FLOAT, false);
                 case TYP_DOUBLE:
                     return INS_cvtss2sd;
-                case TYP_ULONG:
-                    return INS_vcvttss2usi64;
                 default:
                     unreached();
             }
@@ -2346,8 +2343,6 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
                     return INS_cvttsd2si;
                 case TYP_LONG:
                     return INS_cvttsd2si;
-                case TYP_ULONG:
-                    return INS_vcvttsd2usi;
                 case TYP_FLOAT:
                     return INS_cvtsd2ss;
                 case TYP_DOUBLE:
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 94632d1411e79..79621de60c68b 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -795,15 +795,15 @@ void Lowering::LowerCast(GenTree* tree)
     //   srcType = float/double                    castToType = * and overflow detecting cast
     //       Reason: must be converted to a helper call
     //   srcType = float/double,                   castToType = ulong
-    //       Reason: must be converted to a helper call unless we have AVX512F
+    //       Reason: must be converted to a helper call
     //   srcType = uint                            castToType = float/double
     //       Reason: uint -> float/double = uint -> long -> float/double
     //   srcType = ulong                           castToType = float
     //       Reason: ulong -> float = ulong -> double -> float
     if (varTypeIsFloating(srcType))
     {
-        assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
-        assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        assert(!tree->gtOverflow());
+        assert(castToType != TYP_ULONG);
     }
     else if (srcType == TYP_UINT)
     {
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index dd63ecca7494d..1f224d954a126 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -390,10 +390,6 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #endif // !TARGET_AMD64
 
                     case TYP_ULONG:
-#ifdef TARGET_AMD64
-                        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
-                            return nullptr;
-#endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
                     default:
                         unreached();
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 3e9466efa42ed..b7ae2b858aadd 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -572,38 +572,6 @@ FORCEINLINE INT64 FastDbl2Lng(double val)
 #endif
 }
 
-//------------------------------------------------------------------------
-// TruncateDouble: helper function to truncate double 
-//                 numbers to nearest integer (round towards zero).
-//
-// Arguments:
-//    val  - double number to be truncated.
-//
-// Return Value:
-//    double: truncated number (rounded towards zero)
-// 
-double TruncateDouble(double val)
-{
-    LIMITED_METHOD_CONTRACT;
-    int64_t *dintVal = (int64_t *)&val;
-
-    uint64_t uintVal = (uint64_t)*dintVal;
-    int exponent = (int)((uintVal >> 52) & 0x7FF);
-    if (exponent < 1023)
-    {
-        uintVal = uintVal & 0x8000000000000000ull;
-    }
-    else if (exponent < 1075)
-    {
-        uintVal = uintVal &  (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023)));
-    }
-    int64_t intVal = (int64_t)uintVal;
-    double *doubleVal = (double *)&intVal;
-    double retVal = *doubleVal;
-
-    return retVal;
-}
-
 /*********************************************************************/
 HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val)
 {
@@ -621,13 +589,6 @@ HCIMPLEND
 HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 {
     FCALL_CONTRACT;
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-
-    const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN;
-    val = TruncateDouble(val);
-    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val;
-
-#else
     const double two63  = 2147483648.0 * 4294967296.0;
     UINT64 ret;
     if (val < two63) {
@@ -638,7 +599,6 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
         ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
     }
     return ret;
-#endif // TARGET_X86 || TARGET_AMD64
 }
 HCIMPLEND
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Double.cs b/src/libraries/System.Private.CoreLib/src/System/Double.cs
index c459a648b44a6..aaa637ae02a6f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Double.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Double.cs
@@ -1400,7 +1400,7 @@ private static bool TryConvertTo<TOther>(double value, [MaybeNullWhen(false)] ou
             {
 #if TARGET_64BIT
                 nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) :
-                                     (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
+                                     (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
 #else
diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs
index 07a7027359487..6415acc9c798e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Half.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs
@@ -1883,7 +1883,7 @@ private static bool TryConvertTo<TOther>(Half value, [MaybeNullWhen(false)] out
             else if (typeof(TOther) == typeof(nuint))
             {
                 nuint actualResult = (value == PositiveInfinity) ? nuint.MaxValue :
-                                     (value <= Zero || IsNaN(value)) ? nuint.MinValue : (nuint)value;
+                                     (value <= Zero) ? nuint.MinValue : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
             }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs
index d8f35715ff0bf..e5645feb21ffa 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs
@@ -1754,7 +1754,7 @@ private static bool TryConvertTo<TOther>(NFloat value, [MaybeNullWhen(false)] ou
                 return true;
 #else
                 nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) :
-                                     (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
+                                     (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
 #endif
diff --git a/src/libraries/System.Private.CoreLib/src/System/Single.cs b/src/libraries/System.Private.CoreLib/src/System/Single.cs
index 3219e9b27d585..42d63de43279b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Single.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Single.cs
@@ -1380,7 +1380,7 @@ private static bool TryConvertTo<TOther>(float value, [MaybeNullWhen(false)] out
             {
 #if TARGET_64BIT
                 nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) :
-                                     (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
+                                     (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value;
                 result = (TOther)(object)actualResult;
                 return true;
 #else
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index 3890fcac11a3d..bffa2bf179f29 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -137,7 +137,6 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
         return ((x != x) || (x < INT64_MIN) || (x >= uint64_max_plus_1)) ? (uint64_t)INT64_MIN : (x < 0) ? (uint64_t)(int64_t)x : (uint64_t)x;
 
     case CONVERT_SENTINEL:
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
         return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x;
 
     case CONVERT_SATURATING:
@@ -154,7 +153,18 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
                 return (uint64_t)ConvertDoubleToInt64(x - int64_max_plus_1, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
             }
         }
-    
+
+    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
+        if (x < int64_max_plus_1)
+        {
+            return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x;
+        }
+        else
+        {
+            x -= int64_max_plus_1;
+            x = trunc(x);
+            return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000);
+        }    
     
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index e2be91c974fec..ef9a9877de4d0 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -183,7 +183,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                     return (Double.IsNaN(x) || (x < long.MinValue) || (x >= ullong_max_plus_1)) ? unchecked((ulong)long.MinValue): (x < 0) ? (ulong)(long)x: (ulong)x;
 
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                     return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x;
 
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
@@ -200,6 +199,21 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                             return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
                         }
                     }
+
+                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
+
+                    if (x < two63)
+                    {
+                        return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x;
+                    }
+                    else
+                    {
+                        // (double)LLONG_MAX cannot be represented exactly as double
+                        const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1);
+                        x -= two63;
+                        x = Math.Truncate(x);
+                        return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000);
+                    }
                 
             }
 

From 9b56b8620afc4918e9fec27dab804e45ecb90281 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 30 Jun 2023 14:30:07 -0700
Subject: [PATCH 33/40] Rolling back changes for double/float -> ulong

---
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 39 -------------------
 1 file changed, 39 deletions(-)

diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index 4909d6624c71c..9ad553ce15647 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -18,46 +18,8 @@ FORCEINLINE int64_t FastDbl2Lng(double val)
 #endif
 }
 
-//------------------------------------------------------------------------
-// TruncateDouble: helper function to truncate double 
-//                 numbers to nearest integer (round towards zero).
-//
-// Arguments:
-//    val  - double number to be truncated.
-//
-// Return Value:
-//    double: truncated number (rounded towards zero)
-// 
-double TruncateDouble(double val)
-{
-    int64_t *dintVal = (int64_t *)&val;
-
-    uint64_t uintVal = (uint64_t)*dintVal;
-    int exponent = (int)((uintVal >> 52) & 0x7FF);
-    if (exponent < 1023)
-    {
-        uintVal = uintVal & 0x8000000000000000ull;
-    }
-    else if (exponent < 1075)
-    {
-        uintVal = uintVal &  (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023)));
-    }
-    int64_t intVal = (int64_t)uintVal;
-    double *doubleVal = (double *)&intVal;
-    double retVal = *doubleVal;
-
-    return retVal;
-}
-
 EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
 {
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-
-    const double uint64_max_plus_1 = -2.0 * (double)0xFFFFFFFF;
-    val = TruncateDouble(val);
-    return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? 0xFFFFFFFF : (uint64_t)val;
-
-#else
     const double two63  = 2147483648.0 * 4294967296.0;
     uint64_t ret;
     if (val < two63)
@@ -70,7 +32,6 @@ EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
         ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
     }
     return ret;
-#endif // TARGET_X86 || TARGET_AMD64
 }
 
 #undef min

From 930c4731e27b957110634015f695a50ed9e9c654 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 30 Jun 2023 15:21:17 -0700
Subject: [PATCH 34/40] Reverting ouf_or_range_fp_conversion to original
 version

---
 .../Directed/Convert/out_of_range_fp_to_int_conversions.cpp    | 1 -
 .../JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index bffa2bf179f29..7a7df4e173355 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -165,7 +165,6 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
             x = trunc(x);
             return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000);
         }    
-    
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
     }
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index ef9a9877de4d0..ca61c9d26af6d 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -213,8 +213,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                         x -= two63;
                         x = Math.Truncate(x);
                         return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000);
-                    }
-                
+                    }                
             }
 
             return 0;

From b2ae11062e2473bdcfc7ac86ab200811c045b3ce Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 30 Jun 2023 15:24:22 -0700
Subject: [PATCH 35/40] Reverting ouf_or_range_fp_conversion to original
 version

---
 .../JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp | 2 +-
 .../JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index 7a7df4e173355..eaf7f2fa1a9da 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -164,7 +164,7 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
             x -= int64_max_plus_1;
             x = trunc(x);
             return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000);
-        }    
+        }
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
     }
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index ca61c9d26af6d..5b78783c09e4c 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -213,7 +213,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
                         x -= two63;
                         x = Math.Truncate(x);
                         return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000);
-                    }                
+                    }
             }
 
             return 0;

From 0439e289a3967c5b9d7a24ed5b26bc548cc581a5 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 30 Jun 2023 15:26:26 -0700
Subject: [PATCH 36/40] Reverting jithelpers.cpp to original versino

---
 src/coreclr/vm/jithelpers.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index b7ae2b858aadd..1b697efb20b87 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -589,6 +589,7 @@ HCIMPLEND
 HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 {
     FCALL_CONTRACT;
+    
     const double two63  = 2147483648.0 * 4294967296.0;
     UINT64 ret;
     if (val < two63) {

From 2166ae53f847314b6ba06991497c116529316bfc Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 30 Jun 2023 15:26:51 -0700
Subject: [PATCH 37/40] Reverting jithelpers.cpp to original version

---
 src/coreclr/vm/jithelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 1b697efb20b87..d4ce2c9aa69ac 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -589,7 +589,7 @@ HCIMPLEND
 HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
 {
     FCALL_CONTRACT;
-    
+
     const double two63  = 2147483648.0 * 4294967296.0;
     UINT64 ret;
     if (val < two63) {

From e2a6029225bf4a575ceafb527cbda5e47cc62dd5 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Wed, 5 Jul 2023 10:18:14 -0700
Subject: [PATCH 38/40] Changind comments, reverting asserts, skipping to
 change node for cast

---
 src/coreclr/jit/importer.cpp   | 12 ------------
 src/coreclr/jit/instr.cpp      |  3 ++-
 src/coreclr/jit/lowerxarch.cpp |  4 ++--
 src/coreclr/jit/morph.cpp      |  2 +-
 4 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 9c38a69d6854d..de914ea0bdfdc 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -7883,18 +7883,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                                || (impStackTop().val->TypeGet() == TYP_BYREF)
 #endif
                         ;
-#ifdef TARGET_AMD64
-                    // If AVX512 is present and we are not checking for overflow, we do not need
-                    // a large node. In this case, we will not fallback to a helper function but
-                    // will use the intrinsic instead. This is done for all long/ulong to floating
-                    // point conversions. Hence setting the callNode to false to
-                    // avoid generating a large node.
-                    if (callNode && !ovfl && varTypeIsLong(impStackTop().val) &&
-                        compOpportunisticallyDependsOn(InstructionSet_AVX512F))
-                    {
-                        callNode = false;
-                    }
-#endif // TARGET_AMD64
                 }
                 else
                 {
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index c7f57bbccc719..132d09b518b13 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2280,7 +2280,8 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
 //
 instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
 {
-    // AVX: For now we support only conversion from Int/Long -> float
+    // AVX: Supports following conversions
+    //   srcType = int16/int64                     castToType = float
     // AVX512: Supports following conversions
     //   srcType = ulong                           castToType = double/float
 
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 79621de60c68b..80b7c94b9dae3 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree)
     //       Reason: ulong -> float = ulong -> double -> float
     if (varTypeIsFloating(srcType))
     {
-        assert(!tree->gtOverflow());
-        assert(castToType != TYP_ULONG);
+        noway_assert(!tree->gtOverflow());
+        noway_assert(castToType != TYP_ULONG);
     }
     else if (srcType == TYP_UINT)
     {
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 1f224d954a126..6ea83ce9b591f 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -301,7 +301,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     // have the 2 GT_CAST nodes in the tree and we are combining them below.
     if (oper->OperIs(GT_CAST))
     {
-        GenTreeCast* innerCast = static_cast<GenTreeCast*>(oper);
+        GenTreeCast* innerCast = oper->AsCast();
 
         if (innerCast->IsUnsigned())
         {

From 715fc7e357dce0f6c994fad6ae512d25188f0d22 Mon Sep 17 00:00:00 2001
From: Khushal Modi <kcmodi@asu.edu>
Date: Fri, 14 Jul 2023 13:43:23 -0700
Subject: [PATCH 39/40] addressing review comments

---
 src/coreclr/jit/morph.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 6ea83ce9b591f..3d8f13a007519 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -312,13 +312,13 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             innerSrcType           = varTypeToUnsigned(innerSrcType);
 
             // Check if we are going from ulong->double->float
-            if (innerSrcType == TYP_ULONG && innerDstType == TYP_DOUBLE && dstType == TYP_FLOAT)
+            if ((innerSrcType == TYP_ULONG) && (innerDstType == TYP_DOUBLE) && (dstType == TYP_FLOAT))
             {
                 if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
                 {
                     // One optimized (combined) cast here
-                    tree         = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT);
-                    tree->gtType = TYP_FLOAT;
+                    tree         = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT);
+                    //tree->gtType = TYP_FLOAT;
                     return fgMorphTree(tree);
                 }
             }

From dc6e41ac1a2f2e486aa0c702329b807d3a08c75d Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Fri, 14 Jul 2023 19:11:47 -0700
Subject: [PATCH 40/40] Update src/coreclr/jit/morph.cpp

---
 src/coreclr/jit/morph.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 3d8f13a007519..22bddbd9a5e83 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -317,8 +317,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                 if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
                 {
                     // One optimized (combined) cast here
-                    tree         = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT);
-                    //tree->gtType = TYP_FLOAT;
+                    tree = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT);
                     return fgMorphTree(tree);
                 }
             }