From 7d764be028c51efc10dafd00013cda1884c212ec Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 9 May 2023 16:22:00 -0700 Subject: [PATCH 01/40] fixing the JITDbl2Ulng helper function. The new AVX512 instruction vcvtsd2usi uses ulong.max_value to show FPE for negative, NAN and ulong_max + 1 values. --- src/coreclr/vm/jithelpers.cpp | 5 +++++ .../out_of_range_fp_to_int_conversions.cpp | 15 ++++----------- .../out_of_range_fp_to_int_conversions.cs | 16 ++-------------- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index d4ce2c9aa69ac..2949df0d53284 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -589,7 +589,11 @@ HCIMPLEND HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; +#if defined(TARGET_X86) || defined(TARGET_AMD64) + const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; + return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; +#else const double two63 = 2147483648.0 * 4294967296.0; UINT64 ret; if (val < two63) { @@ -600,6 +604,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); } return ret; +#endif } HCIMPLEND diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index eaf7f2fa1a9da..db690e1160f80 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -124,6 +124,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver if (t == CONVERT_NATIVECOMPILERBEHAVIOR) return (uint64_t)x; + double input_val = x; x = trunc(x); // truncate (round toward zero) // (double)UINT64_MAX cannot be represented exactly as double @@ -153,18 +154,10 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return (uint64_t)ConvertDoubleToInt64(x - int64_max_plus_1, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - if (x < int64_max_plus_1) - { - return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x; - } - else - { - x -= int64_max_plus_1; - x = trunc(x); - return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000); - } + return ((input_val != input_val) || (input_val < 0) || (input_val >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)input_val; + case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index 5b78783c09e4c..49197e7965feb 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -171,6 +171,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) if (t == FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR) return (ulong)x; + double input_val = x; x = Truncate(x); // truncate (round toward zero) // (double)ULLONG_MAX cannot be represented exactly as double @@ -199,21 +200,8 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - - if (x < two63) - { - return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x; - } - else - { - // (double)LLONG_MAX cannot be represented exactly as double - const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1); - x -= two63; - x = Math.Truncate(x); - return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000); - } + return (Double.IsNaN(input_val) || (input_val < 0) || (input_val >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)input_val; } return 0; From f50408b6b9f6bef3b64b2f54922ca377f44bb1ab Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 10 May 2023 03:20:29 -0700 Subject: [PATCH 02/40] Making changes to the library test case expected output based on the architecture. This is because we have changed the JITDbl2Ulng helper function to mimic the new IEEE compliant AVX512 instruction vcvtsd2usi. In the process, we needed to update the library test case because the default Floating Point Error (FPE) value for the new instruction is different from the default MSVC FPE value i.e. 0. --- .../tests/System/UIntPtrTests.GenericMath.cs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs index 2e752a91af21f..117c87db6ce9e 100644 --- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs +++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs @@ -12,6 +12,7 @@ public class UIntPtrTests_GenericMath // // IAdditionOperators // + public static Architecture arch = RuntimeInformation.ProcessArchitecture; [Fact] public static void op_AdditionTest() @@ -2223,7 +2224,7 @@ public static void CreateSaturatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); } [Fact] @@ -2244,7 +2245,7 @@ public static void CreateSaturatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); } [Fact] @@ -2351,7 +2352,7 @@ public static void CreateSaturatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); } [Fact] @@ -2396,7 +2397,7 @@ public static void CreateSaturatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); } [Fact] @@ -2535,7 +2536,7 @@ public static void CreateTruncatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); } [Fact] @@ -2556,7 +2557,7 @@ public static void CreateTruncatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); } [Fact] @@ -2685,7 +2686,7 @@ public static void CreateTruncatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); } [Fact] @@ -2741,7 +2742,7 @@ public static void CreateTruncatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); } [Fact] From f018095ace63b13fda4fe91f24d3d7913c10fb85 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 12 May 2023 13:32:23 -0700 Subject: [PATCH 03/40] Fixing the JITDbl2Ulng helper function. Also making sure that we are not changing the library test case but the API to make sure NaN cases are handled. --- src/coreclr/scripts/jitformat.py | 12 +++++++----- src/coreclr/vm/jithelpers.cpp | 3 ++- .../System.Private.CoreLib/src/System/Double.cs | 2 +- .../System.Private.CoreLib/src/System/Half.cs | 2 +- .../src/System/Runtime/InteropServices/NFloat.cs | 2 +- .../System.Private.CoreLib/src/System/Single.cs | 2 +- .../tests/System/UIntPtrTests.GenericMath.cs | 16 ++++++++-------- .../out_of_range_fp_to_int_conversions.cpp | 6 ++---- .../out_of_range_fp_to_int_conversions.cs | 5 ++--- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py index 51a096c59cd3c..497a5a12290e3 100644 --- a/src/coreclr/scripts/jitformat.py +++ b/src/coreclr/scripts/jitformat.py @@ -21,6 +21,7 @@ import tarfile import tempfile import zipfile +import time class ChangeDir: def __init__(self, dir): @@ -81,7 +82,7 @@ def main(argv): args, unknown = parser.parse_known_args(argv) if unknown: - logging.warning('Ignoring argument(s): {}'.format(','.join(unknown))) + logging.warn('Ignoring argument(s): {}'.format(','.join(unknown))) if args.coreclr is None: logging.error('Specify --coreclr') @@ -140,10 +141,11 @@ def main(argv): bootstrapPath = os.path.join(temp_location, bootstrapFilename) assert len(os.listdir(os.path.dirname(bootstrapPath))) == 0 - - if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): - logging.error("Did not download bootstrap!") - return -1 + print(bootstrapPath) + time.sleep(60) + # if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): + # logging.error("Did not download bootstrap!") + # return -1 if platform == 'windows': # Need to ensure we have Windows line endings on the downloaded script file, diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 2949df0d53284..d1e9193e252a6 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -590,8 +590,9 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; #if defined(TARGET_X86) || defined(TARGET_AMD64) + const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; - return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; + return ((val != val) || ((val < 0) && (val + 1 <= 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; #else const double two63 = 2147483648.0 * 4294967296.0; diff --git a/src/libraries/System.Private.CoreLib/src/System/Double.cs b/src/libraries/System.Private.CoreLib/src/System/Double.cs index aaa637ae02a6f..c459a648b44a6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Double.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Double.cs @@ -1400,7 +1400,7 @@ private static bool TryConvertTo(double value, [MaybeNullWhen(false)] ou { #if TARGET_64BIT nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #else diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs index 6415acc9c798e..07a7027359487 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Half.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs @@ -1883,7 +1883,7 @@ private static bool TryConvertTo(Half value, [MaybeNullWhen(false)] out else if (typeof(TOther) == typeof(nuint)) { nuint actualResult = (value == PositiveInfinity) ? nuint.MaxValue : - (value <= Zero) ? nuint.MinValue : (nuint)value; + (value <= Zero || IsNaN(value)) ? nuint.MinValue : (nuint)value; result = (TOther)(object)actualResult; return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs index e5645feb21ffa..d8f35715ff0bf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs @@ -1754,7 +1754,7 @@ private static bool TryConvertTo(NFloat value, [MaybeNullWhen(false)] ou return true; #else nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #endif diff --git a/src/libraries/System.Private.CoreLib/src/System/Single.cs b/src/libraries/System.Private.CoreLib/src/System/Single.cs index 42d63de43279b..3219e9b27d585 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Single.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Single.cs @@ -1380,7 +1380,7 @@ private static bool TryConvertTo(float value, [MaybeNullWhen(false)] out { #if TARGET_64BIT nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #else diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs index 117c87db6ce9e..414788a4c4742 100644 --- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs +++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs @@ -2224,7 +2224,7 @@ public static void CreateSaturatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); } [Fact] @@ -2245,7 +2245,7 @@ public static void CreateSaturatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); } [Fact] @@ -2352,7 +2352,7 @@ public static void CreateSaturatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); } [Fact] @@ -2397,7 +2397,7 @@ public static void CreateSaturatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); } [Fact] @@ -2536,7 +2536,7 @@ public static void CreateTruncatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); } [Fact] @@ -2557,7 +2557,7 @@ public static void CreateTruncatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); } [Fact] @@ -2686,7 +2686,7 @@ public static void CreateTruncatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); } [Fact] @@ -2742,7 +2742,7 @@ public static void CreateTruncatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); } [Fact] diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index db690e1160f80..3890fcac11a3d 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -124,7 +124,6 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver if (t == CONVERT_NATIVECOMPILERBEHAVIOR) return (uint64_t)x; - double input_val = x; x = trunc(x); // truncate (round toward zero) // (double)UINT64_MAX cannot be represented exactly as double @@ -138,6 +137,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return ((x != x) || (x < INT64_MIN) || (x >= uint64_max_plus_1)) ? (uint64_t)INT64_MIN : (x < 0) ? (uint64_t)(int64_t)x : (uint64_t)x; case CONVERT_SENTINEL: + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x; case CONVERT_SATURATING: @@ -155,9 +155,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver } } - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - return ((input_val != input_val) || (input_val < 0) || (input_val >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)input_val; - + case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index 49197e7965feb..e2be91c974fec 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -171,7 +171,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) if (t == FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR) return (ulong)x; - double input_val = x; x = Truncate(x); // truncate (round toward zero) // (double)ULLONG_MAX cannot be represented exactly as double @@ -184,6 +183,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (Double.IsNaN(x) || (x < long.MinValue) || (x >= ullong_max_plus_1)) ? unchecked((ulong)long.MinValue): (x < 0) ? (ulong)(long)x: (ulong)x; case FPtoIntegerConversionType.CONVERT_SENTINEL: + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x; case FPtoIntegerConversionType.CONVERT_SATURATING: @@ -200,8 +200,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - return (Double.IsNaN(input_val) || (input_val < 0) || (input_val >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)input_val; + } return 0; From ffe97cd63fb098f8a38cf3de9218fe0feddc6303 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 12 May 2023 13:33:26 -0700 Subject: [PATCH 04/40] reverting jitformat --- src/coreclr/scripts/jitformat.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py index 497a5a12290e3..ad63529fa7580 100644 --- a/src/coreclr/scripts/jitformat.py +++ b/src/coreclr/scripts/jitformat.py @@ -21,7 +21,6 @@ import tarfile import tempfile import zipfile -import time class ChangeDir: def __init__(self, dir): @@ -141,11 +140,10 @@ def main(argv): bootstrapPath = os.path.join(temp_location, bootstrapFilename) assert len(os.listdir(os.path.dirname(bootstrapPath))) == 0 - print(bootstrapPath) - time.sleep(60) - # if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): - # logging.error("Did not download bootstrap!") - # return -1 + + if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): + logging.error("Did not download bootstrap!") + return -1 if platform == 'windows': # Need to ensure we have Windows line endings on the downloaded script file, From a8ee861013973bf7141233602b677740550bae70 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 15 May 2023 16:31:05 -0700 Subject: [PATCH 05/40] Adding a truncate function to the Dbl2Ulng helper to make sure we avoid handling edge cases (-1,0) separately inside the helper. --- src/coreclr/vm/jithelpers.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index d1e9193e252a6..1d9dbb77cc2de 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -572,6 +572,30 @@ FORCEINLINE INT64 FastDbl2Lng(double val) #endif } +/*********************************************************************/ +// helper function to truncate double numbers to nearest integer (round towards zero) +double TrucateDouble(double val) +{ + FCALL_CONTRACT; + int64_t *dintVal = (int64_t *)&val; + + uint64_t uintVal = (uint64_t)*dintVal; + int exponent = (int)((uintVal >> 52) & 0x7FF); + if (exponent < 1023) + { + uintVal = uintVal & 0x8000000000000000ull; + } + else if (exponent < 1075) + { + uintVal = uintVal & (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023))); + } + int64_t intVal = (int64_t)uintVal; + double *doubleVal = (double *)&intVal; + double retVal = *doubleVal; + + return retVal; +} + /*********************************************************************/ HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val) { @@ -592,7 +616,9 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) #if defined(TARGET_X86) || defined(TARGET_AMD64) const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; - return ((val != val) || ((val < 0) && (val + 1 <= 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; + val = TrucateDouble(val); + //return ((val != val) || ((val < 0) && (val + 1 < 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; + return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; #else const double two63 = 2147483648.0 * 4294967296.0; From bbd8a8b6e3d0564996b310be63b216dbbf0deb3e Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 16 May 2023 00:24:42 -0700 Subject: [PATCH 06/40] Adding code to handle vectorized conversion for float/double to/from ulong/uint --- src/coreclr/jit/codegenxarch.cpp | 20 +++++++++++++++++--- src/coreclr/jit/emitxarch.cpp | 12 ++++++++++-- src/coreclr/jit/instr.cpp | 16 ++++++++++++++++ src/coreclr/jit/lowerxarch.cpp | 8 ++++---- src/coreclr/jit/morph.cpp | 4 ++++ 5 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 75d8e5432c4ae..c9c5677e1e796 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7338,6 +7338,18 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) noway_assert(srcType != TYP_UINT); noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); + if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + { + if (srcType == TYP_ULONG && (dstType == TYP_DOUBLE || dstType == TYP_FLOAT)) + { + genConsumeOperands(treeNode->AsOp()); + instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); + GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); + genProduceReg(treeNode); + return; + } + } + // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used // which does a partial write to lower 4/8 bytes of xmm register keeping the other // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, @@ -7449,8 +7461,10 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase. - noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); + // into a helper call by either front-end or lowering phase, unless we have AVX512F + // accelerated conversions. + noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. @@ -7463,7 +7477,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // Note that we need to specify dstType here so that it will determine // the size of destination integer register and also the rex.w prefix. genConsumeOperands(treeNode->AsOp()); - instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType)); + instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); genProduceReg(treeNode); } diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 13307006a6db3..088d8ba03a49f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -18595,15 +18595,23 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvtsi2sd64: case INS_cvtsi2ss64: case INS_vcvtsd2usi: - case INS_vcvttsd2usi: case INS_vcvtusi2sd32: - case INS_vcvtusi2sd64: case INS_vcvtusi2ss32: case INS_vcvtusi2ss64: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; + case INS_vcvttsd2usi: + result.insLatency += PERFSCORE_LATENCY_6C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case INS_vcvtusi2sd64: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_5C; + break; + case INS_cvttss2si: case INS_cvtss2si: case INS_vcvtss2usi: diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 93c4e601bb781..82f8166d4e81d 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2281,6 +2281,9 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type) instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) { // AVX: For now we support only conversion from Int/Long -> float + // AVX512: Supports following conversions + // srcType = float/double castToType = ulong + // srcType = ulong castToType = double switch (from) { @@ -2329,6 +2332,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) return ins_Move_Extend(TYP_FLOAT, false); case TYP_DOUBLE: return INS_cvtss2sd; + case TYP_ULONG: + return INS_vcvttss2usi; default: unreached(); } @@ -2341,6 +2346,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) return INS_cvttsd2si; case TYP_LONG: return INS_cvttsd2si; + case TYP_ULONG: + return INS_vcvttsd2usi; case TYP_FLOAT: return INS_cvtsd2ss; case TYP_DOUBLE: @@ -2350,6 +2357,15 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) } break; + case TYP_ULONG: + switch (to) + { + case TYP_DOUBLE: + return INS_vcvtusi2sd64; + default: + unreached(); + } + default: unreached(); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index aa4258d71ba77..bba730cc5faee 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -795,15 +795,15 @@ void Lowering::LowerCast(GenTree* tree) // srcType = float/double castToType = * and overflow detecting cast // Reason: must be converted to a helper call // srcType = float/double, castToType = ulong - // Reason: must be converted to a helper call + // Reason: must be converted to a helper call unless we have AVX512F // srcType = uint castToType = float/double // Reason: uint -> float/double = uint -> long -> float/double // srcType = ulong castToType = float // Reason: ulong -> float = ulong -> double -> float - if (varTypeIsFloating(srcType)) + if (srcType == TYP_FLOAT) { - noway_assert(!tree->gtOverflow()); - noway_assert(castToType != TYP_ULONG); + noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); } else if (srcType == TYP_UINT) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8e4c6612b41a1..400ca8e48a34c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -357,6 +357,10 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #endif // !TARGET_AMD64 case TYP_ULONG: +#ifdef TARGET_AMD64 + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + return nullptr; +#endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); From a21a0775a257bc693b0cebf8bb6591d5344260f5 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 16 May 2023 11:45:15 -0700 Subject: [PATCH 07/40] reverting changes for float to ulong --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 400ca8e48a34c..ead3703537ca4 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -358,7 +358,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) case TYP_ULONG: #ifdef TARGET_AMD64 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) && srcType != TYP_FLOAT) return nullptr; #endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); From 1e3415ab924b641373e03413f6b15a9a389a9e27 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 16 May 2023 14:28:24 -0700 Subject: [PATCH 08/40] enabling float to ulong conversion --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ead3703537ca4..400ca8e48a34c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -358,7 +358,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) case TYP_ULONG: #ifdef TARGET_AMD64 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) && srcType != TYP_FLOAT) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) return nullptr; #endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); From c788c67f0d97526bddf4effa617ee30eb65cee1a Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 17 May 2023 00:35:51 -0700 Subject: [PATCH 09/40] Making change to set w1 bit for evex --- src/coreclr/jit/instrsxarch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 161df4485e0d9..382896ffe61ab 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -633,7 +633,7 @@ INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_ INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD From fbb2a90f8b87aed086005b68fd4fa05bf5e4efae Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 00:25:58 -0700 Subject: [PATCH 10/40] merging with main. Picking up hwintrinsiclistxarh from main trying to return EA_4BYTE for INS_vcvttss2usi to make sure that we read dword and not qword for float to ulong --- src/coreclr/jit/emit.h | 6 ++++++ src/coreclr/jit/emitxarch.cpp | 27 +++++++++++++-------------- src/coreclr/jit/instr.cpp | 2 +- src/coreclr/jit/instrsxarch.h | 3 ++- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 9cab8e6fcea2f..5bbaa389df189 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3891,6 +3891,12 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const return EA_32BYTE; } + case INS_vcvttss2usi64: + case INS_vcvttss2usi32: + { + return EA_4BYTE; + } + case INS_movddup: { if (defaultSize == 64) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 088d8ba03a49f..ef6a90c7113fa 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1399,17 +1399,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - case INS_vcvttss2usi: - { - if (attr == EA_8BYTE) - { - return true; - } - - // TODO-Cleanup: This should really only ever be EA_4BYTE - assert((attr == EA_4BYTE) || (attr == EA_16BYTE)); - return false; - } case INS_vbroadcastsd: case INS_vpbroadcastq: @@ -2623,7 +2612,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - case INS_vcvttss2usi: + case INS_vcvttss2usi32: + case INS_vcvttss2usi64: { // These SSE instructions write to a general purpose integer register. return false; @@ -11435,7 +11425,7 @@ void emitter::emitDispIns( case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - case INS_vcvttss2usi: + //case INS_vcvttss2usi: { printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); break; @@ -18615,10 +18605,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvttss2si: case INS_cvtss2si: case INS_vcvtss2usi: - case INS_vcvttss2usi: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C; break; + + case INS_vcvttss2usi32: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_7C; + break; + + case INS_vcvttss2usi64: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_8C; + break; case INS_cvtss2sd: result.insThroughput = PERFSCORE_THROUGHPUT_1C; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 82f8166d4e81d..cdeca11b327ee 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2333,7 +2333,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) case TYP_DOUBLE: return INS_cvtss2sd; case TYP_ULONG: - return INS_vcvttss2usi; + return INS_vcvttss2usi64; default: unreached(); } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 382896ffe61ab..0f16b1c87297f 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -637,7 +637,8 @@ INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_ INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD -INST3(vcvttss2usi, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double From 9fece01dd61191f06df0c91f42910ba643551b9f Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 01:16:12 -0700 Subject: [PATCH 11/40] jit format --- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/emitxarch.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5bbaa389df189..5fbc31bb116c4 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3896,7 +3896,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const { return EA_4BYTE; } - + case INS_movddup: { if (defaultSize == 64) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ef6a90c7113fa..bcebaa6e89bed 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -11425,11 +11425,11 @@ void emitter::emitDispIns( case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - //case INS_vcvttss2usi: - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); - break; - } + // case INS_vcvttss2usi: + { + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); + break; + } #ifdef TARGET_AMD64 case INS_movsxd: @@ -18608,12 +18608,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C; break; - + case INS_vcvttss2usi32: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; - + case INS_vcvttss2usi64: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_8C; From b40cd8ed897600b1cf29e2e1292f267ffdd0562d Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 14:00:03 -0700 Subject: [PATCH 12/40] Splitting vcvttss2usi to vcvttss2usi32 and vcvttss2usi64. Also adding a special handling for vcvttss2usi64 to make sure we read only dword instead of qword for float to ulong conversion --- src/coreclr/jit/emit.h | 7 +++++-- src/coreclr/jit/emitxarch.cpp | 26 +++++++++++++++++++++----- src/coreclr/jit/instrsxarch.h | 2 +- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5fbc31bb116c4..2b08bb8c03d08 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3892,9 +3892,12 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const } case INS_vcvttss2usi64: - case INS_vcvttss2usi32: { - return EA_4BYTE; + if (defaultSize == 8) + { + return EA_4BYTE; + } + return defaultSize; } case INS_movddup: diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index bcebaa6e89bed..937ea7a7fcc55 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1399,6 +1399,16 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: + { + if (attr == EA_8BYTE) + { + return true; + } + + // TODO-Cleanup: This should really only ever be EA_4BYTE + assert((attr == EA_4BYTE) || (attr == EA_16BYTE)); + return false; + } case INS_vbroadcastsd: case INS_vpbroadcastq: @@ -11425,11 +11435,17 @@ void emitter::emitDispIns( case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - // case INS_vcvttss2usi: - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); - break; - } + { + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); + break; + } + + case INS_vcvttss2usi32: + case INS_vcvttss2usi64: + { + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE)); + break; + } #ifdef TARGET_AMD64 case INS_movsxd: diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 0f16b1c87297f..add7a79abbde6 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -633,7 +633,7 @@ INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_ INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD From 710026eab3381ba459b358ab2fae5cb638b7e159 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 16:42:40 -0700 Subject: [PATCH 13/40] undoing jitformat changes due to merge error --- src/coreclr/scripts/jitformat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py index ad63529fa7580..51a096c59cd3c 100644 --- a/src/coreclr/scripts/jitformat.py +++ b/src/coreclr/scripts/jitformat.py @@ -81,7 +81,7 @@ def main(argv): args, unknown = parser.parse_known_args(argv) if unknown: - logging.warn('Ignoring argument(s): {}'.format(','.join(unknown))) + logging.warning('Ignoring argument(s): {}'.format(','.join(unknown))) if args.coreclr is None: logging.error('Specify --coreclr') From 75e6acfc43e6c0e38f0ed4c3440523fef847d3da Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 19 May 2023 15:03:03 -0700 Subject: [PATCH 14/40] removing unused code and correcting throughput and latency information for vcvttsd2usi, vcvttusi2sd32/64 --- src/coreclr/jit/emitxarch.cpp | 8 ++------ src/coreclr/vm/jithelpers.cpp | 1 - .../tests/System/UIntPtrTests.GenericMath.cs | 1 - 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 937ea7a7fcc55..cc6816af87099 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -18601,19 +18601,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvtsi2sd64: case INS_cvtsi2ss64: case INS_vcvtsd2usi: - case INS_vcvtusi2sd32: case INS_vcvtusi2ss32: case INS_vcvtusi2ss64: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency += PERFSCORE_LATENCY_7C; - break; - case INS_vcvttsd2usi: - result.insLatency += PERFSCORE_LATENCY_6C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_7C; break; case INS_vcvtusi2sd64: + case INS_vcvtusi2sd32: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_5C; break; diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 1d9dbb77cc2de..78db022d9ea03 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -617,7 +617,6 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; val = TrucateDouble(val); - //return ((val != val) || ((val < 0) && (val + 1 < 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; #else diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs index 414788a4c4742..2e752a91af21f 100644 --- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs +++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs @@ -12,7 +12,6 @@ public class UIntPtrTests_GenericMath // // IAdditionOperators // - public static Architecture arch = RuntimeInformation.ProcessArchitecture; [Fact] public static void op_AdditionTest() From e15be4b072f18cc4d8b9554e7bb1de0fb407dedb Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 19 May 2023 15:04:39 -0700 Subject: [PATCH 15/40] correcting throughput and latency for vcvttss2usi32 and placing it with other similar instructions --- src/coreclr/jit/emitxarch.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index cc6816af87099..9f9ffdc5a614e 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -18604,6 +18604,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vcvtusi2ss32: case INS_vcvtusi2ss64: case INS_vcvttsd2usi: + case INS_vcvttss2usi32: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; @@ -18621,11 +18622,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C; break; - case INS_vcvttss2usi32: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency += PERFSCORE_LATENCY_7C; - break; - case INS_vcvttss2usi64: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_8C; From 10e28769b60ceadf3500ed14299cf21b6247f209 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 19 May 2023 16:11:52 -0700 Subject: [PATCH 16/40] formatting --- src/coreclr/vm/jithelpers.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 78db022d9ea03..54e2cb7de63ca 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -572,9 +572,17 @@ FORCEINLINE INT64 FastDbl2Lng(double val) #endif } -/*********************************************************************/ -// helper function to truncate double numbers to nearest integer (round towards zero) -double TrucateDouble(double val) +//------------------------------------------------------------------------ +// TruncateDouble: helper function to truncate double +// numbers to nearest integer (round towards zero). +// +// Arguments: +// val - double number to be truncated. +// +// Return Value: +// truncated number (rounded towards zero) +// +double TruncateDouble(double val) { FCALL_CONTRACT; int64_t *dintVal = (int64_t *)&val; @@ -616,7 +624,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) #if defined(TARGET_X86) || defined(TARGET_AMD64) const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; - val = TrucateDouble(val); + val = TruncateDouble(val); return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; #else From 9463173664bb823cc8f130ba92f0dda79efbc993 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 19 May 2023 16:13:32 -0700 Subject: [PATCH 17/40] formatting --- src/coreclr/vm/jithelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 54e2cb7de63ca..c2aeefc0e93dc 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -638,7 +638,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); } return ret; -#endif +#endif // TARGET_X86 || TARGET_AMD64 } HCIMPLEND From 4f7bb670ed06916a279c82ee0d4f39aec737591b Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Sun, 21 May 2023 22:02:05 -0700 Subject: [PATCH 18/40] updating comments --- src/coreclr/vm/jithelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index c2aeefc0e93dc..476ca84d90ee0 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -580,7 +580,7 @@ FORCEINLINE INT64 FastDbl2Lng(double val) // val - double number to be truncated. // // Return Value: -// truncated number (rounded towards zero) +// double: truncated number (rounded towards zero) // double TruncateDouble(double val) { From a99725c6dc67dd035489359a0b71649939fb52b4 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 24 May 2023 11:41:05 -0700 Subject: [PATCH 19/40] updating code for github comments. Using compIsaSupportedDebugOnly for nowayasserts and also checking for float and doubel both in lowercast for overflow and conversion to ulong --- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/emit.h | 6 +----- src/coreclr/jit/lowerxarch.cpp | 6 +++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c9c5677e1e796..f958ed60b8064 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // into a helper call by either front-end or lowering phase, unless we have AVX512F // accelerated conversions. noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 2b08bb8c03d08..e2b3b350963b5 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3893,11 +3893,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const case INS_vcvttss2usi64: { - if (defaultSize == 8) - { - return EA_4BYTE; - } - return defaultSize; + return EA_4BYTE; } case INS_movddup: diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index bba730cc5faee..1508a4f5962df 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -800,10 +800,10 @@ void Lowering::LowerCast(GenTree* tree) // Reason: uint -> float/double = uint -> long -> float/double // srcType = ulong castToType = float // Reason: ulong -> float = ulong -> double -> float - if (srcType == TYP_FLOAT) + if (varTypeIsFloating(srcType)) { - noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); - noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + noway_assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + noway_assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); } else if (srcType == TYP_UINT) { From 44390b22c64d36e8b3b14bb958d62a8ae3f2ca47 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 24 May 2023 13:07:53 -0700 Subject: [PATCH 20/40] reverting to original checks for ISA supported Debug only because they are not available in release mode --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- src/coreclr/jit/lowerxarch.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index f958ed60b8064..5ea1360ae3c7c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7349,7 +7349,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) return; } } - + // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used // which does a partial write to lower 4/8 bytes of xmm register keeping the other // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, @@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // into a helper call by either front-end or lowering phase, unless we have AVX512F // accelerated conversions. noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1508a4f5962df..d049ab74c52dd 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree) // Reason: ulong -> float = ulong -> double -> float if (varTypeIsFloating(srcType)) { - noway_assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - noway_assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); } else if (srcType == TYP_UINT) { From 2f20ef35cc5e1ab78b3612f8fadb8f449bea46f4 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 24 May 2023 16:39:08 -0700 Subject: [PATCH 21/40] running jitformat --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 5ea1360ae3c7c..f958ed60b8064 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7349,7 +7349,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) return; } } - + // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used // which does a partial write to lower 4/8 bytes of xmm register keeping the other // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, @@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // into a helper call by either front-end or lowering phase, unless we have AVX512F // accelerated conversions. noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. From b7dff8aaca98b5bea175a30674ddcefe6bd6d451 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 24 May 2023 20:55:58 -0700 Subject: [PATCH 22/40] running jitformat --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index f958ed60b8064..c9c5677e1e796 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7464,7 +7464,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // into a helper call by either front-end or lowering phase, unless we have AVX512F // accelerated conversions. noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. From 9622f78ba3b32ead44ffd5dd429e8fe01befc189 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 16 Jun 2023 20:34:35 -0700 Subject: [PATCH 23/40] combine the 2 nodes GT_CAST(GT_CAST(TYP_ULONG, TYP_DOUBLE), TYP_FLOAT) into a single node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT) --- src/coreclr/jit/codegenxarch.cpp | 3 ++- src/coreclr/jit/importer.cpp | 10 +++++++++ src/coreclr/jit/instr.cpp | 2 ++ src/coreclr/jit/lowerxarch.cpp | 2 +- src/coreclr/jit/morph.cpp | 37 +++++++++++++++++++++++++++++++- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c9c5677e1e796..a289b49e9b5e3 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7336,7 +7336,8 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions // here since they should have been lowered appropriately. noway_assert(srcType != TYP_UINT); - noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); + noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index de914ea0bdfdc..4aea31a14a762 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -7883,6 +7883,16 @@ void Compiler::impImportBlockCode(BasicBlock* block) || (impStackTop().val->TypeGet() == TYP_BYREF) #endif ; +#ifdef TARGET_AMD64 + // If AVX512 is present and we are not checking for overflow, we do not need + // a large node. In this case, we will not fallback to a helper function but + // will use the intrinsic instead. Hence setting the callNode to false to + // avoid generating a large node. + if (callNode && compOpportunisticallyDependsOn(InstructionSet_AVX512F) && !ovfl) + { + callNode = false; + } +#endif // TARGET_AMD64 } else { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index cdeca11b327ee..43cd4ce2ddc17 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2362,6 +2362,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) { case TYP_DOUBLE: return INS_vcvtusi2sd64; + case TYP_FLOAT: + return INS_vcvtusi2ss64; default: unreached(); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index d049ab74c52dd..4e9caf1783830 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -811,7 +811,7 @@ void Lowering::LowerCast(GenTree* tree) } else if (srcType == TYP_ULONG) { - noway_assert(castToType != TYP_FLOAT); + noway_assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); } // Case of src is a small type and dst is a floating point type. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 400ca8e48a34c..9822a7ea44fd6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -293,6 +293,41 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) var_types dstType = tree->CastToType(); unsigned dstSize = genTypeSize(dstType); +#if defined(TARGET_AMD64) + // If AVX512 is present, we have intrinsic available to convert + // ulong directly to float. Hence, we need to combine the 2 nodes + // GT_CAST(GT_CAST(TYP_ULONG, TYP_DOUBLE), TYP_FLOAT) into a single + // node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT). At this point, we already + // have the 2 GT_CAST nodes in the tree and we are combining them below. + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + { + if (oper->OperIs(GT_CAST)) + { + GenTreeCast* innerCast = static_cast(oper); + + if (innerCast->IsUnsigned()) + { + GenTree* innerOper = innerCast->CastOp(); + var_types innerSrcType = genActualType(innerOper); + var_types innerDstType = innerCast->CastToType(); + unsigned innerDstSize = genTypeSize(innerDstType); + innerSrcType = varTypeToUnsigned(innerSrcType); + + if (innerSrcType == TYP_ULONG) + { + if (dstType == TYP_FLOAT && innerDstType == TYP_DOUBLE) + { + // One optimized cast here + tree = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT); + tree->gtType = TYP_FLOAT; + return fgMorphTree(tree); + } + } + } + } + } +#endif + // See if the cast has to be done in two steps. R -> I if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { @@ -453,7 +488,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { srcType = varTypeToUnsigned(srcType); - if (srcType == TYP_ULONG) + if (srcType == TYP_ULONG && !compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { if (dstType == TYP_FLOAT) { From d3b542f1a93b7d315fc91d16ac238a35ec4dfc8d Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Sun, 18 Jun 2023 16:27:56 -0700 Subject: [PATCH 24/40] merging with main and updating hwintrinsiclistxarch to take into consideration 32bit and 64 bit version of vcvttss2usi. --- src/coreclr/jit/hwintrinsiclistxarch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index e1649b2159c55..a11d80e02f958 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -845,7 +845,7 @@ HARDWARE_INTRINSIC(AVX512F, CompareNotEqual, HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) @@ -1002,7 +1002,7 @@ HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags From 8343e18b20213333adb210e24dc023c4bd854180 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Sun, 18 Jun 2023 20:06:51 -0700 Subject: [PATCH 25/40] Changing noway_assert to assert to make sure compOpportunisticallyDependsOn only runs in debug mode. --- src/coreclr/jit/codegenxarch.cpp | 8 ++++---- src/coreclr/jit/lowerxarch.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index a289b49e9b5e3..7d4463d653d3b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7336,8 +7336,8 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions // here since they should have been lowered appropriately. noway_assert(srcType != TYP_UINT); - noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { @@ -7464,8 +7464,8 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // We shouldn't be seeing uint64 here as it should have been converted // into a helper call by either front-end or lowering phase, unless we have AVX512F // accelerated conversions. - noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 4e9caf1783830..e10306c2a78dd 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree) // Reason: ulong -> float = ulong -> double -> float if (varTypeIsFloating(srcType)) { - noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); - noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); } else if (srcType == TYP_UINT) { @@ -811,7 +811,7 @@ void Lowering::LowerCast(GenTree* tree) } else if (srcType == TYP_ULONG) { - noway_assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); } // Case of src is a small type and dst is a floating point type. From e4567633d1b269116a29d3e96cc2cdc21954286b Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Sun, 18 Jun 2023 20:25:12 -0700 Subject: [PATCH 26/40] running jitformat --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7d4463d653d3b..dc9a09b7d17a8 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7337,7 +7337,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) // here since they should have been lowered appropriately. noway_assert(srcType != TYP_UINT); assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { From fdb28c6520e14fbd0db5a2b46cc5007183f21b45 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 20 Jun 2023 09:38:48 -0700 Subject: [PATCH 27/40] Changing compOpportunisticallyDependsOn to compIsaSupportedDebugOnly in asserts aka code review changes --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- src/coreclr/jit/lowerxarch.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index dc9a09b7d17a8..898c69ec4cf6a 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7337,7 +7337,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) // here since they should have been lowered appropriately. noway_assert(srcType != TYP_UINT); assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { @@ -7465,7 +7465,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // into a helper call by either front-end or lowering phase, unless we have AVX512F // accelerated conversions. assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e10306c2a78dd..94632d1411e79 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree) // Reason: ulong -> float = ulong -> double -> float if (varTypeIsFloating(srcType)) { - assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); - assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); } else if (srcType == TYP_UINT) { @@ -811,7 +811,7 @@ void Lowering::LowerCast(GenTree* tree) } else if (srcType == TYP_ULONG) { - assert(castToType != TYP_FLOAT || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(castToType != TYP_FLOAT || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); } // Case of src is a small type and dst is a floating point type. From e9ff179f92548bf28a863c1eba466d3133fb2caf Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 22 Jun 2023 12:58:11 -0700 Subject: [PATCH 28/40] Making code review changes. Moving around the comOpportunisticallyDependsOn checks to make sure they are ran only if we need AVX512. These checks being costly, moving them to the innermost checks in nested if checks. --- src/coreclr/jit/codegenxarch.cpp | 17 +++++++-------- src/coreclr/jit/importer.cpp | 6 ++++-- src/coreclr/jit/morph.cpp | 36 +++++++++++++++----------------- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 898c69ec4cf6a..7789c25074863 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7339,16 +7339,15 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) && + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { - if (srcType == TYP_ULONG && (dstType == TYP_DOUBLE || dstType == TYP_FLOAT)) - { - genConsumeOperands(treeNode->AsOp()); - instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); - GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); - genProduceReg(treeNode); - return; - } + assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + genConsumeOperands(treeNode->AsOp()); + instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); + GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); + genProduceReg(treeNode); + return; } // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4aea31a14a762..9c38a69d6854d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -7886,9 +7886,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef TARGET_AMD64 // If AVX512 is present and we are not checking for overflow, we do not need // a large node. In this case, we will not fallback to a helper function but - // will use the intrinsic instead. Hence setting the callNode to false to + // will use the intrinsic instead. This is done for all long/ulong to floating + // point conversions. Hence setting the callNode to false to // avoid generating a large node. - if (callNode && compOpportunisticallyDependsOn(InstructionSet_AVX512F) && !ovfl) + if (callNode && !ovfl && varTypeIsLong(impStackTop().val) && + compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { callNode = false; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9822a7ea44fd6..dd63ecca7494d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -299,34 +299,32 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) // GT_CAST(GT_CAST(TYP_ULONG, TYP_DOUBLE), TYP_FLOAT) into a single // node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT). At this point, we already // have the 2 GT_CAST nodes in the tree and we are combining them below. - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (oper->OperIs(GT_CAST)) { - if (oper->OperIs(GT_CAST)) + GenTreeCast* innerCast = static_cast(oper); + + if (innerCast->IsUnsigned()) { - GenTreeCast* innerCast = static_cast(oper); + GenTree* innerOper = innerCast->CastOp(); + var_types innerSrcType = genActualType(innerOper); + var_types innerDstType = innerCast->CastToType(); + unsigned innerDstSize = genTypeSize(innerDstType); + innerSrcType = varTypeToUnsigned(innerSrcType); - if (innerCast->IsUnsigned()) + // Check if we are going from ulong->double->float + if (innerSrcType == TYP_ULONG && innerDstType == TYP_DOUBLE && dstType == TYP_FLOAT) { - GenTree* innerOper = innerCast->CastOp(); - var_types innerSrcType = genActualType(innerOper); - var_types innerDstType = innerCast->CastToType(); - unsigned innerDstSize = genTypeSize(innerDstType); - innerSrcType = varTypeToUnsigned(innerSrcType); - - if (innerSrcType == TYP_ULONG) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { - if (dstType == TYP_FLOAT && innerDstType == TYP_DOUBLE) - { - // One optimized cast here - tree = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT); - tree->gtType = TYP_FLOAT; - return fgMorphTree(tree); - } + // One optimized (combined) cast here + tree = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT); + tree->gtType = TYP_FLOAT; + return fgMorphTree(tree); } } } } -#endif +#endif // TARGET_AMD64 // See if the cast has to be done in two steps. R -> I if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) From db2a0cb242f12f59ad45c7fc8ad25727dea1c3aa Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 23 Jun 2023 10:49:48 -0700 Subject: [PATCH 29/40] FCALL_CONTRACT should be only used on FCalls itself --- src/coreclr/vm/jithelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 476ca84d90ee0..3e9466efa42ed 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -584,7 +584,7 @@ FORCEINLINE INT64 FastDbl2Lng(double val) // double TruncateDouble(double val) { - FCALL_CONTRACT; + LIMITED_METHOD_CONTRACT; int64_t *dintVal = (int64_t *)&val; uint64_t uintVal = (uint64_t)*dintVal; From 167b563f07a19480f438e17c4767f522d8598342 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 23 Jun 2023 13:13:06 -0700 Subject: [PATCH 30/40] Making paralle changes to JITHelper in MathHelper for native AOT --- src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index 9ad553ce15647..cf554efd56308 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -18,8 +18,46 @@ FORCEINLINE int64_t FastDbl2Lng(double val) #endif } +//------------------------------------------------------------------------ +// TruncateDouble: helper function to truncate double +// numbers to nearest integer (round towards zero). +// +// Arguments: +// val - double number to be truncated. +// +// Return Value: +// double: truncated number (rounded towards zero) +// +double TruncateDouble(double val) +{ + int64_t *dintVal = (int64_t *)&val; + + uint64_t uintVal = (uint64_t)*dintVal; + int exponent = (int)((uintVal >> 52) & 0x7FF); + if (exponent < 1023) + { + uintVal = uintVal & 0x8000000000000000ull; + } + else if (exponent < 1075) + { + uintVal = uintVal & (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023))); + } + int64_t intVal = (int64_t)uintVal; + double *doubleVal = (double *)&intVal; + double retVal = *doubleVal; + + return retVal; +} + EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val) { +#if defined(TARGET_X86) || defined(TARGET_AMD64) + + const double uint64_max_plus_1 = -2.0 * (double)LONG_MIN; + val = TruncateDouble(val); + return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? ULONG_MAX : (uint64_t)val; + +#else const double two63 = 2147483648.0 * 4294967296.0; uint64_t ret; if (val < two63) @@ -32,6 +70,7 @@ EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val) ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); } return ret; +#endif // TARGET_X86 || TARGET_AMD64 } #undef min From b02a96c6548abc958dae482f01203fb6ecc161cf Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 23 Jun 2023 14:06:04 -0700 Subject: [PATCH 31/40] resolving regression issues --- src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index cf554efd56308..4909d6624c71c 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -53,9 +53,9 @@ EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val) { #if defined(TARGET_X86) || defined(TARGET_AMD64) - const double uint64_max_plus_1 = -2.0 * (double)LONG_MIN; + const double uint64_max_plus_1 = -2.0 * (double)0xFFFFFFFF; val = TruncateDouble(val); - return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? ULONG_MAX : (uint64_t)val; + return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? 0xFFFFFFFF : (uint64_t)val; #else const double two63 = 2147483648.0 * 4294967296.0; From fc0d127815f2240230e9b60cf4a5270d4a468ecc Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 30 Jun 2023 13:26:13 -0700 Subject: [PATCH 32/40] Rolling back changes for double/float -> ulong --- src/coreclr/jit/codegenxarch.cpp | 8 ++-- src/coreclr/jit/emit.h | 5 --- src/coreclr/jit/instr.cpp | 7 +--- src/coreclr/jit/lowerxarch.cpp | 6 +-- src/coreclr/jit/morph.cpp | 4 -- src/coreclr/vm/jithelpers.cpp | 40 ------------------- .../src/System/Double.cs | 2 +- .../System.Private.CoreLib/src/System/Half.cs | 2 +- .../System/Runtime/InteropServices/NFloat.cs | 2 +- .../src/System/Single.cs | 2 +- .../out_of_range_fp_to_int_conversions.cpp | 14 ++++++- .../out_of_range_fp_to_int_conversions.cs | 16 +++++++- 12 files changed, 38 insertions(+), 70 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7789c25074863..8635d699c0fd9 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7461,10 +7461,8 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase, unless we have AVX512F - // accelerated conversions. - assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || - compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + // into a helper call by either front-end or lowering phase. + assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. @@ -7477,7 +7475,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // Note that we need to specify dstType here so that it will determine // the size of destination integer register and also the rex.w prefix. genConsumeOperands(treeNode->AsOp()); - instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); + instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType)); GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); genProduceReg(treeNode); } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index e2b3b350963b5..9cab8e6fcea2f 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3891,11 +3891,6 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const return EA_32BYTE; } - case INS_vcvttss2usi64: - { - return EA_4BYTE; - } - case INS_movddup: { if (defaultSize == 64) diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 43cd4ce2ddc17..c7f57bbccc719 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2282,8 +2282,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) { // AVX: For now we support only conversion from Int/Long -> float // AVX512: Supports following conversions - // srcType = float/double castToType = ulong - // srcType = ulong castToType = double + // srcType = ulong castToType = double/float switch (from) { @@ -2332,8 +2331,6 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) return ins_Move_Extend(TYP_FLOAT, false); case TYP_DOUBLE: return INS_cvtss2sd; - case TYP_ULONG: - return INS_vcvttss2usi64; default: unreached(); } @@ -2346,8 +2343,6 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) return INS_cvttsd2si; case TYP_LONG: return INS_cvttsd2si; - case TYP_ULONG: - return INS_vcvttsd2usi; case TYP_FLOAT: return INS_cvtsd2ss; case TYP_DOUBLE: diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 94632d1411e79..79621de60c68b 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -795,15 +795,15 @@ void Lowering::LowerCast(GenTree* tree) // srcType = float/double castToType = * and overflow detecting cast // Reason: must be converted to a helper call // srcType = float/double, castToType = ulong - // Reason: must be converted to a helper call unless we have AVX512F + // Reason: must be converted to a helper call // srcType = uint castToType = float/double // Reason: uint -> float/double = uint -> long -> float/double // srcType = ulong castToType = float // Reason: ulong -> float = ulong -> double -> float if (varTypeIsFloating(srcType)) { - assert(!tree->gtOverflow() || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(castToType != TYP_ULONG || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(!tree->gtOverflow()); + assert(castToType != TYP_ULONG); } else if (srcType == TYP_UINT) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index dd63ecca7494d..1f224d954a126 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -390,10 +390,6 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #endif // !TARGET_AMD64 case TYP_ULONG: -#ifdef TARGET_AMD64 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - return nullptr; -#endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 3e9466efa42ed..b7ae2b858aadd 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -572,38 +572,6 @@ FORCEINLINE INT64 FastDbl2Lng(double val) #endif } -//------------------------------------------------------------------------ -// TruncateDouble: helper function to truncate double -// numbers to nearest integer (round towards zero). -// -// Arguments: -// val - double number to be truncated. -// -// Return Value: -// double: truncated number (rounded towards zero) -// -double TruncateDouble(double val) -{ - LIMITED_METHOD_CONTRACT; - int64_t *dintVal = (int64_t *)&val; - - uint64_t uintVal = (uint64_t)*dintVal; - int exponent = (int)((uintVal >> 52) & 0x7FF); - if (exponent < 1023) - { - uintVal = uintVal & 0x8000000000000000ull; - } - else if (exponent < 1075) - { - uintVal = uintVal & (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023))); - } - int64_t intVal = (int64_t)uintVal; - double *doubleVal = (double *)&intVal; - double retVal = *doubleVal; - - return retVal; -} - /*********************************************************************/ HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val) { @@ -621,13 +589,6 @@ HCIMPLEND HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; -#if defined(TARGET_X86) || defined(TARGET_AMD64) - - const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; - val = TruncateDouble(val); - return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; - -#else const double two63 = 2147483648.0 * 4294967296.0; UINT64 ret; if (val < two63) { @@ -638,7 +599,6 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); } return ret; -#endif // TARGET_X86 || TARGET_AMD64 } HCIMPLEND diff --git a/src/libraries/System.Private.CoreLib/src/System/Double.cs b/src/libraries/System.Private.CoreLib/src/System/Double.cs index c459a648b44a6..aaa637ae02a6f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Double.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Double.cs @@ -1400,7 +1400,7 @@ private static bool TryConvertTo(double value, [MaybeNullWhen(false)] ou { #if TARGET_64BIT nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #else diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs index 07a7027359487..6415acc9c798e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Half.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs @@ -1883,7 +1883,7 @@ private static bool TryConvertTo(Half value, [MaybeNullWhen(false)] out else if (typeof(TOther) == typeof(nuint)) { nuint actualResult = (value == PositiveInfinity) ? nuint.MaxValue : - (value <= Zero || IsNaN(value)) ? nuint.MinValue : (nuint)value; + (value <= Zero) ? nuint.MinValue : (nuint)value; result = (TOther)(object)actualResult; return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs index d8f35715ff0bf..e5645feb21ffa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs @@ -1754,7 +1754,7 @@ private static bool TryConvertTo(NFloat value, [MaybeNullWhen(false)] ou return true; #else nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #endif diff --git a/src/libraries/System.Private.CoreLib/src/System/Single.cs b/src/libraries/System.Private.CoreLib/src/System/Single.cs index 3219e9b27d585..42d63de43279b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Single.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Single.cs @@ -1380,7 +1380,7 @@ private static bool TryConvertTo(float value, [MaybeNullWhen(false)] out { #if TARGET_64BIT nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #else diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index 3890fcac11a3d..bffa2bf179f29 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -137,7 +137,6 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return ((x != x) || (x < INT64_MIN) || (x >= uint64_max_plus_1)) ? (uint64_t)INT64_MIN : (x < 0) ? (uint64_t)(int64_t)x : (uint64_t)x; case CONVERT_SENTINEL: - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x; case CONVERT_SATURATING: @@ -154,7 +153,18 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return (uint64_t)ConvertDoubleToInt64(x - int64_max_plus_1, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - + + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: + if (x < int64_max_plus_1) + { + return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x; + } + else + { + x -= int64_max_plus_1; + x = trunc(x); + return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000); + } case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index e2be91c974fec..ef9a9877de4d0 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -183,7 +183,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (Double.IsNaN(x) || (x < long.MinValue) || (x >= ullong_max_plus_1)) ? unchecked((ulong)long.MinValue): (x < 0) ? (ulong)(long)x: (ulong)x; case FPtoIntegerConversionType.CONVERT_SENTINEL: - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x; case FPtoIntegerConversionType.CONVERT_SATURATING: @@ -200,6 +199,21 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } + + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: + + if (x < two63) + { + return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x; + } + else + { + // (double)LLONG_MAX cannot be represented exactly as double + const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1); + x -= two63; + x = Math.Truncate(x); + return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000); + } } From 9b56b8620afc4918e9fec27dab804e45ecb90281 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 30 Jun 2023 14:30:07 -0700 Subject: [PATCH 33/40] Rolling back changes for double/float -> ulong --- src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index 4909d6624c71c..9ad553ce15647 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -18,46 +18,8 @@ FORCEINLINE int64_t FastDbl2Lng(double val) #endif } -//------------------------------------------------------------------------ -// TruncateDouble: helper function to truncate double -// numbers to nearest integer (round towards zero). -// -// Arguments: -// val - double number to be truncated. -// -// Return Value: -// double: truncated number (rounded towards zero) -// -double TruncateDouble(double val) -{ - int64_t *dintVal = (int64_t *)&val; - - uint64_t uintVal = (uint64_t)*dintVal; - int exponent = (int)((uintVal >> 52) & 0x7FF); - if (exponent < 1023) - { - uintVal = uintVal & 0x8000000000000000ull; - } - else if (exponent < 1075) - { - uintVal = uintVal & (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023))); - } - int64_t intVal = (int64_t)uintVal; - double *doubleVal = (double *)&intVal; - double retVal = *doubleVal; - - return retVal; -} - EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val) { -#if defined(TARGET_X86) || defined(TARGET_AMD64) - - const double uint64_max_plus_1 = -2.0 * (double)0xFFFFFFFF; - val = TruncateDouble(val); - return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? 0xFFFFFFFF : (uint64_t)val; - -#else const double two63 = 2147483648.0 * 4294967296.0; uint64_t ret; if (val < two63) @@ -70,7 +32,6 @@ EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val) ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); } return ret; -#endif // TARGET_X86 || TARGET_AMD64 } #undef min From 930c4731e27b957110634015f695a50ed9e9c654 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 30 Jun 2023 15:21:17 -0700 Subject: [PATCH 34/40] Reverting ouf_or_range_fp_conversion to original version --- .../Directed/Convert/out_of_range_fp_to_int_conversions.cpp | 1 - .../JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index bffa2bf179f29..7a7df4e173355 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -165,7 +165,6 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver x = trunc(x); return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000); } - case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index ef9a9877de4d0..ca61c9d26af6d 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -213,8 +213,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) x -= two63; x = Math.Truncate(x); return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000); - } - + } } return 0; From b2ae11062e2473bdcfc7ac86ab200811c045b3ce Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 30 Jun 2023 15:24:22 -0700 Subject: [PATCH 35/40] Reverting ouf_or_range_fp_conversion to original version --- .../JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp | 2 +- .../JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index 7a7df4e173355..eaf7f2fa1a9da 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -164,7 +164,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver x -= int64_max_plus_1; x = trunc(x); return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000); - } + } case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index ca61c9d26af6d..5b78783c09e4c 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -213,7 +213,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) x -= two63; x = Math.Truncate(x); return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000); - } + } } return 0; From 0439e289a3967c5b9d7a24ed5b26bc548cc581a5 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 30 Jun 2023 15:26:26 -0700 Subject: [PATCH 36/40] Reverting jithelpers.cpp to original versino --- src/coreclr/vm/jithelpers.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index b7ae2b858aadd..1b697efb20b87 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -589,6 +589,7 @@ HCIMPLEND HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; + const double two63 = 2147483648.0 * 4294967296.0; UINT64 ret; if (val < two63) { From 2166ae53f847314b6ba06991497c116529316bfc Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 30 Jun 2023 15:26:51 -0700 Subject: [PATCH 37/40] Reverting jithelpers.cpp to original version --- src/coreclr/vm/jithelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 1b697efb20b87..d4ce2c9aa69ac 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -589,7 +589,7 @@ HCIMPLEND HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; - + const double two63 = 2147483648.0 * 4294967296.0; UINT64 ret; if (val < two63) { From e2a6029225bf4a575ceafb527cbda5e47cc62dd5 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 5 Jul 2023 10:18:14 -0700 Subject: [PATCH 38/40] Changind comments, reverting asserts, skipping to change node for cast --- src/coreclr/jit/importer.cpp | 12 ------------ src/coreclr/jit/instr.cpp | 3 ++- src/coreclr/jit/lowerxarch.cpp | 4 ++-- src/coreclr/jit/morph.cpp | 2 +- 4 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 9c38a69d6854d..de914ea0bdfdc 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -7883,18 +7883,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) || (impStackTop().val->TypeGet() == TYP_BYREF) #endif ; -#ifdef TARGET_AMD64 - // If AVX512 is present and we are not checking for overflow, we do not need - // a large node. In this case, we will not fallback to a helper function but - // will use the intrinsic instead. This is done for all long/ulong to floating - // point conversions. Hence setting the callNode to false to - // avoid generating a large node. - if (callNode && !ovfl && varTypeIsLong(impStackTop().val) && - compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - { - callNode = false; - } -#endif // TARGET_AMD64 } else { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index c7f57bbccc719..132d09b518b13 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2280,7 +2280,8 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type) // instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) { - // AVX: For now we support only conversion from Int/Long -> float + // AVX: Supports following conversions + // srcType = int16/int64 castToType = float // AVX512: Supports following conversions // srcType = ulong castToType = double/float diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 79621de60c68b..80b7c94b9dae3 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -802,8 +802,8 @@ void Lowering::LowerCast(GenTree* tree) // Reason: ulong -> float = ulong -> double -> float if (varTypeIsFloating(srcType)) { - assert(!tree->gtOverflow()); - assert(castToType != TYP_ULONG); + noway_assert(!tree->gtOverflow()); + noway_assert(castToType != TYP_ULONG); } else if (srcType == TYP_UINT) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1f224d954a126..6ea83ce9b591f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -301,7 +301,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) // have the 2 GT_CAST nodes in the tree and we are combining them below. if (oper->OperIs(GT_CAST)) { - GenTreeCast* innerCast = static_cast(oper); + GenTreeCast* innerCast = oper->AsCast(); if (innerCast->IsUnsigned()) { From 715fc7e357dce0f6c994fad6ae512d25188f0d22 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 14 Jul 2023 13:43:23 -0700 Subject: [PATCH 39/40] addressing review comments --- src/coreclr/jit/morph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 6ea83ce9b591f..3d8f13a007519 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -312,13 +312,13 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) innerSrcType = varTypeToUnsigned(innerSrcType); // Check if we are going from ulong->double->float - if (innerSrcType == TYP_ULONG && innerDstType == TYP_DOUBLE && dstType == TYP_FLOAT) + if ((innerSrcType == TYP_ULONG) && (innerDstType == TYP_DOUBLE) && (dstType == TYP_FLOAT)) { if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { // One optimized (combined) cast here - tree = gtNewCastNode(TYP_ULONG, innerOper, true, TYP_FLOAT); - tree->gtType = TYP_FLOAT; + tree = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT); + //tree->gtType = TYP_FLOAT; return fgMorphTree(tree); } } From dc6e41ac1a2f2e486aa0c702329b807d3a08c75d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 14 Jul 2023 19:11:47 -0700 Subject: [PATCH 40/40] Update src/coreclr/jit/morph.cpp --- src/coreclr/jit/morph.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3d8f13a007519..22bddbd9a5e83 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -317,8 +317,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { // One optimized (combined) cast here - tree = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT); - //tree->gtType = TYP_FLOAT; + tree = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT); return fgMorphTree(tree); } }