From 7d7e5d105fe5bde1dba2550ed29dc73fd6ebbbd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Fri, 23 Feb 2024 16:21:06 +0100 Subject: [PATCH 01/14] Merging with main Rewrite math jit helpers to managed code + Fix asserts. --- src/coreclr/inc/corinfo.h | 10 +- src/coreclr/inc/jithelpers.h | 34 +- src/coreclr/jit/gentree.cpp | 1 + src/coreclr/jit/morph.cpp | 4 +- src/coreclr/jit/utils.cpp | 4 - src/coreclr/jit/valuenum.cpp | 12 - src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 88 +--- .../Runtime/CompilerHelpers/MathHelpers.cs | 245 +--------- .../Common/TypeSystem/IL/HelperExtensions.cs | 8 + .../ILCompiler.Compiler/Compiler/JitHelper.cs | 62 ++- .../IL/ILImporter.Scanner.cs | 48 +- src/coreclr/vm/corelib.h | 23 +- src/coreclr/vm/ecall.cpp | 56 +++ src/coreclr/vm/i386/jithelp.S | 81 ---- src/coreclr/vm/i386/jithelp.asm | 212 --------- src/coreclr/vm/i386/jitinterfacex86.cpp | 45 -- src/coreclr/vm/jithelpers.cpp | 429 ++---------------- src/coreclr/vm/jitinterface.cpp | 15 +- src/coreclr/vm/jitinterface.h | 11 - .../System.Private.CoreLib/src/System/Math.cs | 246 ++++++++++ .../src/System/MathF.cs | 23 + 21 files changed, 525 insertions(+), 1132 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 5fad5e4b2429e..b4417e60af4d5 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -387,18 +387,18 @@ enum CorInfoHelpFunc CORINFO_HELP_ULMOD, CORINFO_HELP_LNG2DBL, // Convert a signed int64 to a double CORINFO_HELP_ULNG2DBL, // Convert a unsigned int64 to a double - CORINFO_HELP_DBL2INT, + CORINFO_HELP_DBL2INT, // unused CORINFO_HELP_DBL2INT_OVF, CORINFO_HELP_DBL2LNG, CORINFO_HELP_DBL2LNG_OVF, - CORINFO_HELP_DBL2UINT, + CORINFO_HELP_DBL2UINT, // unused CORINFO_HELP_DBL2UINT_OVF, CORINFO_HELP_DBL2ULNG, CORINFO_HELP_DBL2ULNG_OVF, CORINFO_HELP_FLTREM, CORINFO_HELP_DBLREM, - CORINFO_HELP_FLTROUND, - CORINFO_HELP_DBLROUND, + CORINFO_HELP_FLTROUND, // unused + CORINFO_HELP_DBLROUND, // unused /* Allocating a new object. Always use ICorClassInfo::getNewHelper() to decide which is the right helper to use to allocate an object of a given type. */ @@ -2061,7 +2061,7 @@ class ICorStaticInfo // Example of a scenario addressed by notifyMethodInfoUsage: // 1) Crossgen (with --opt-cross-module=MyLib) attempts to inline a call from MyLib.dll into MyApp.dll // and realizes that the call always throws. - // 2) JIT aborts the inlining attempt and marks the call as no-return instead. The code that follows the call is + // 2) JIT aborts the inlining attempt and marks the call as no-return instead. The code that follows the call is // replaced with a breakpoint instruction that is expected to be unreachable. // 3) MyLib is updated to a new version so it's no longer within the same version bubble with MyApp.dll // and the new version of the call no longer throws and does some work. diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index a0982f3ac6520..e0f65ff5de908 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -35,8 +35,6 @@ JITHELPER(CORINFO_HELP_UDIV, JIT_UDiv, CORINFO_HELP_SIG_8_STACK) JITHELPER(CORINFO_HELP_UMOD, JIT_UMod, CORINFO_HELP_SIG_8_STACK) - // CORINFO_HELP_DBL2INT, CORINFO_HELP_DBL2UINT, and CORINFO_HELP_DBL2LONG get - // patched for CPUs that support SSE2 (P4 and above). #ifndef TARGET_64BIT JITHELPER(CORINFO_HELP_LLSH, JIT_LLsh, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_LRSH, JIT_LRsh, CORINFO_HELP_SIG_REG_ONLY) @@ -47,26 +45,26 @@ JITHELPER(CORINFO_HELP_LRSZ, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB) #endif // TARGET_64BIT JITHELPER(CORINFO_HELP_LMUL, JIT_LMul, CORINFO_HELP_SIG_16_STACK) - JITHELPER(CORINFO_HELP_LMUL_OVF, JIT_LMulOvf, CORINFO_HELP_SIG_16_STACK) - JITHELPER(CORINFO_HELP_ULMUL_OVF, JIT_ULMulOvf, CORINFO_HELP_SIG_16_STACK) + DYNAMICJITHELPER(CORINFO_HELP_LMUL_OVF, NULL, CORINFO_HELP_SIG_16_STACK) + DYNAMICJITHELPER(CORINFO_HELP_ULMUL_OVF, NULL, CORINFO_HELP_SIG_16_STACK) JITHELPER(CORINFO_HELP_LDIV, JIT_LDiv, CORINFO_HELP_SIG_16_STACK) JITHELPER(CORINFO_HELP_LMOD, JIT_LMod, CORINFO_HELP_SIG_16_STACK) JITHELPER(CORINFO_HELP_ULDIV, JIT_ULDiv, CORINFO_HELP_SIG_16_STACK) JITHELPER(CORINFO_HELP_ULMOD, JIT_ULMod, CORINFO_HELP_SIG_16_STACK) JITHELPER(CORINFO_HELP_LNG2DBL, JIT_Lng2Dbl, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_ULNG2DBL, JIT_ULng2Dbl, CORINFO_HELP_SIG_8_STACK) - DYNAMICJITHELPER(CORINFO_HELP_DBL2INT, JIT_Dbl2Lng, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBL2INT_OVF, JIT_Dbl2IntOvf, CORINFO_HELP_SIG_8_STACK) - DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG, JIT_Dbl2Lng, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBL2LNG_OVF, JIT_Dbl2LngOvf, CORINFO_HELP_SIG_8_STACK) - DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT, JIT_Dbl2Lng, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBL2UINT_OVF, JIT_Dbl2UIntOvf, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBL2ULNG, JIT_Dbl2ULng, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBL2ULNG_OVF, JIT_Dbl2ULngOvf, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_FLTREM, JIT_FltRem, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBLREM, JIT_DblRem, CORINFO_HELP_SIG_16_STACK) - JITHELPER(CORINFO_HELP_FLTROUND, JIT_FloatRound, CORINFO_HELP_SIG_8_STACK) - JITHELPER(CORINFO_HELP_DBLROUND, JIT_DoubleRound, CORINFO_HELP_SIG_16_STACK) + DYNAMICJITHELPER(CORINFO_HELP_ULNG2DBL, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2INT, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2INT_OVF, NULL, CORINFO_HELP_SIG_8_STACK) + JITHELPER(CORINFO_HELP_DBL2LNG, JIT_Dbl2Lng, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG_OVF, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT_OVF, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG_OVF, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_FLTREM, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBLREM, NULL, CORINFO_HELP_SIG_16_STACK) + DYNAMICJITHELPER(CORINFO_HELP_FLTROUND, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_DBLROUND, NULL, CORINFO_HELP_SIG_16_STACK) // Allocating a new object JITHELPER(CORINFO_HELP_NEWFAST, JIT_New, CORINFO_HELP_SIG_REG_ONLY) @@ -203,7 +201,7 @@ JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE, JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR, JIT_GetSharedGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR, JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY) - JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedNonGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedNonGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 489d3f42c4c0a..ffb461ec4d829 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -5969,6 +5969,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) switch (tree->AsIntrinsic()->gtIntrinsicName) { case NI_System_Math_Atan2: + case NI_System_Math_FMod: case NI_System_Math_Pow: // These math intrinsics are actually implemented by user calls. Increase the // Sethi 'complexity' by two to reflect the argument register requirement. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 43fb58c0f8837..0e7edf4f15902 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -377,7 +377,9 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #if defined(TARGET_ARM) || defined(TARGET_AMD64) return nullptr; #else // TARGET_X86 - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); + oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG); + tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT); + return fgMorphTree(tree); #endif // TARGET_X86 case TYP_LONG: diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index aed8cda7c24df..5b9e74e91b242 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1536,14 +1536,10 @@ void HelperCallProperties::init() case CORINFO_HELP_LMUL: case CORINFO_HELP_LNG2DBL: case CORINFO_HELP_ULNG2DBL: - case CORINFO_HELP_DBL2INT: case CORINFO_HELP_DBL2LNG: - case CORINFO_HELP_DBL2UINT: case CORINFO_HELP_DBL2ULNG: case CORINFO_HELP_FLTREM: case CORINFO_HELP_DBLREM: - case CORINFO_HELP_FLTROUND: - case CORINFO_HELP_DBLROUND: isPure = true; noThrow = true; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 79cc554835540..4a8ca85aa3e58 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12759,11 +12759,6 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call) srcIsUnsigned = true; break; - case CORINFO_HELP_DBL2INT: - castToType = TYP_INT; - castFromType = TYP_DOUBLE; - break; - case CORINFO_HELP_DBL2INT_OVF: castToType = TYP_INT; castFromType = TYP_DOUBLE; @@ -12781,11 +12776,6 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call) hasOverflowCheck = true; break; - case CORINFO_HELP_DBL2UINT: - castToType = TYP_UINT; - castFromType = TYP_DOUBLE; - break; - case CORINFO_HELP_DBL2UINT_OVF: castToType = TYP_UINT; castFromType = TYP_DOUBLE; @@ -13091,11 +13081,9 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) { case CORINFO_HELP_LNG2DBL: case CORINFO_HELP_ULNG2DBL: - case CORINFO_HELP_DBL2INT: case CORINFO_HELP_DBL2INT_OVF: case CORINFO_HELP_DBL2LNG: case CORINFO_HELP_DBL2LNG_OVF: - case CORINFO_HELP_DBL2UINT: case CORINFO_HELP_DBL2UINT_OVF: case CORINFO_HELP_DBL2ULNG: case CORINFO_HELP_DBL2ULNG_OVF: diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index 930c200a34441..6491813e3ed4e 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -5,78 +5,13 @@ #include "CommonMacros.h" #include "rhassert.h" -// -// Floating point and 64-bit integer math helpers. -// - -EXTERN_C uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val) -{ - const double two63 = 2147483648.0 * 4294967296.0; - uint64_t ret; - if (val < two63) - { - ret = (int64_t)(val); - } - else - { - // subtract 0x8000000000000000, do the convert then add it back again - ret = (int64_t)(val - two63) + I64(0x8000000000000000); - } - return ret; -} - #undef min #undef max #include -EXTERN_C float REDHAWK_CALLCONV RhpFltRem(float dividend, float divisor) -{ - // - // From the ECMA standard: - // - // If [divisor] is zero or [dividend] is infinity - // the result is NaN. - // If [divisor] is infinity, - // the result is [dividend] (negated for -infinity***). - // - // ***"negated for -infinity" has been removed from the spec - // - - if (divisor==0 || !std::isfinite(dividend)) - { - return -nanf(""); - } - else if (!std::isfinite(divisor) && !std::isnan(divisor)) - { - return dividend; - } - // else... - return fmodf(dividend,divisor); -} - -EXTERN_C double REDHAWK_CALLCONV RhpDblRem(double dividend, double divisor) -{ - // - // From the ECMA standard: - // - // If [divisor] is zero or [dividend] is infinity - // the result is NaN. - // If [divisor] is infinity, - // the result is [dividend] (negated for -infinity***). - // - // ***"negated for -infinity" has been removed from the spec - // - if (divisor==0 || !std::isfinite(dividend)) - { - return -nan(""); - } - else if (!std::isfinite(divisor) && !std::isnan(divisor)) - { - return dividend; - } - // else... - return(fmod(dividend,divisor)); -} +// +// Floating point and 64-bit integer math helpers. +// #ifdef HOST_ARM EXTERN_C int32_t REDHAWK_CALLCONV RhpIDiv(int32_t i, int32_t j) @@ -152,22 +87,7 @@ EXTERN_C int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val) return (int64_t)val; } -EXTERN_C int32_t REDHAWK_CALLCONV RhpDbl2Int(double val) -{ - return (int32_t)val; -} - -EXTERN_C uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val) -{ - return (uint32_t)val; -} - -EXTERN_C double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val) -{ - return (double)val; -} - -EXTERN_C double REDHAWK_CALLCONV RhpULng2Dbl(uint64_t val) +EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val) { return (double)val; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs index 7175ea9c00cbe..b6930717ab0b7 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs @@ -5,8 +5,6 @@ using System.Runtime; using System.Runtime.CompilerServices; -using Internal.Runtime; - namespace Internal.Runtime.CompilerHelpers { /// @@ -16,136 +14,10 @@ namespace Internal.Runtime.CompilerHelpers internal static class MathHelpers { #if !TARGET_64BIT - // - // 64-bit checked multiplication for 32-bit platforms - // - private const string RuntimeLibrary = "*"; - // Helper to multiply two 32-bit uints - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ulong Mul32x32To64(uint a, uint b) - { - return a * (ulong)b; - } - - // Helper to get high 32-bit of 64-bit int - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint Hi32Bits(long a) - { - return (uint)(a >> 32); - } - - // Helper to get high 32-bit of 64-bit int - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint Hi32Bits(ulong a) - { - return (uint)(a >> 32); - } - - [RuntimeExport("LMulOvf")] - public static long LMulOvf(long i, long j) - { - long ret; - - // Remember the sign of the result - int sign = (int)(Hi32Bits(i) ^ Hi32Bits(j)); - - // Convert to unsigned multiplication - if (i < 0) i = -i; - if (j < 0) j = -j; - - // Get the upper 32 bits of the numbers - uint val1High = Hi32Bits(i); - uint val2High = Hi32Bits(j); - - ulong valMid; - - if (val1High == 0) - { - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val2High, (uint)i); - } - else - { - if (val2High != 0) - goto ThrowExcep; - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val1High, (uint)j); - } - - // See if any bits after bit 32 are set - if (Hi32Bits(valMid) != 0) - goto ThrowExcep; - - ret = (long)(Mul32x32To64((uint)i, (uint)j) + (valMid << 32)); - - // check for overflow - if (Hi32Bits(ret) < (uint)valMid) - goto ThrowExcep; - - if (sign >= 0) - { - // have we spilled into the sign bit? - if (ret < 0) - goto ThrowExcep; - } - else - { - ret = -ret; - // have we spilled into the sign bit? - if (ret > 0) - goto ThrowExcep; - } - return ret; - - ThrowExcep: - return ThrowLngOvf(); - } - - [RuntimeExport("ULMulOvf")] - public static ulong ULMulOvf(ulong i, ulong j) - { - ulong ret; - - // Get the upper 32 bits of the numbers - uint val1High = Hi32Bits(i); - uint val2High = Hi32Bits(j); - - ulong valMid; - - if (val1High == 0) - { - if (val2High == 0) - return Mul32x32To64((uint)i, (uint)j); - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val2High, (uint)i); - } - else - { - if (val2High != 0) - goto ThrowExcep; - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val1High, (uint)j); - } - - // See if any bits after bit 32 are set - if (Hi32Bits(valMid) != 0) - goto ThrowExcep; - - ret = Mul32x32To64((uint)i, (uint)j) + (valMid << 32); - - // check for overflow - if (Hi32Bits(ret) < (uint)valMid) - goto ThrowExcep; - return ret; - - ThrowExcep: - return ThrowULngOvf(); - } - [RuntimeImport(RuntimeLibrary, "RhpULMod")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern ulong RhpULMod(ulong i, ulong j); public static ulong ULMod(ulong i, ulong j) @@ -157,7 +29,7 @@ public static ulong ULMod(ulong i, ulong j) } [RuntimeImport(RuntimeLibrary, "RhpLMod")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern long RhpLMod(long i, long j); public static long LMod(long i, long j) @@ -171,7 +43,7 @@ public static long LMod(long i, long j) } [RuntimeImport(RuntimeLibrary, "RhpULDiv")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern ulong RhpULDiv(ulong i, ulong j); public static ulong ULDiv(ulong i, ulong j) @@ -183,7 +55,7 @@ public static ulong ULDiv(ulong i, ulong j) } [RuntimeImport(RuntimeLibrary, "RhpLDiv")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern long RhpLDiv(long i, long j); public static long LDiv(long i, long j) @@ -196,94 +68,9 @@ public static long LDiv(long i, long j) return RhpLDiv(i, j); } - [MethodImpl(MethodImplOptions.NoInlining)] - private static long ThrowLngDivByZero() - { - throw new DivideByZeroException(); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static ulong ThrowULngDivByZero() - { - throw new DivideByZeroException(); - } -#endif // TARGET_64BIT - - [RuntimeExport("Dbl2IntOvf")] - public static int Dbl2IntOvf(double val) - { - const double two31 = 2147483648.0; - - // Note that this expression also works properly for val = NaN case - if (val > -two31 - 1 && val < two31) - return unchecked((int)val); - - return ThrowIntOvf(); - } - - [RuntimeExport("Dbl2UIntOvf")] - public static uint Dbl2UIntOvf(double val) - { - // Note that this expression also works properly for val = NaN case - if (val > -1.0 && val < 4294967296.0) - return unchecked((uint)val); - - return ThrowUIntOvf(); - } - - [RuntimeExport("Dbl2LngOvf")] - public static long Dbl2LngOvf(double val) - { - const double two63 = 2147483648.0 * 4294967296.0; - - // Note that this expression also works properly for val = NaN case - // We need to compare with the very next double to two63. 0x402 is epsilon to get us there. - if (val > -two63 - 0x402 && val < two63) - return unchecked((long)val); - - return ThrowLngOvf(); - } - - [RuntimeExport("Dbl2ULngOvf")] - public static ulong Dbl2ULngOvf(double val) - { - const double two64 = 2.0 * 2147483648.0 * 4294967296.0; - - // Note that this expression also works properly for val = NaN case - if (val > -1.0 && val < two64) - return unchecked((ulong)val); - - return ThrowULngOvf(); - } - - [RuntimeExport("Flt2IntOvf")] - public static int Flt2IntOvf(float val) - { - const double two31 = 2147483648.0; - - // Note that this expression also works properly for val = NaN case - if (val > -two31 - 1 && val < two31) - return ((int)val); - - return ThrowIntOvf(); - } - - [RuntimeExport("Flt2LngOvf")] - public static long Flt2LngOvf(float val) - { - const double two63 = 2147483648.0 * 4294967296.0; - - // Note that this expression also works properly for val = NaN case - // We need to compare with the very next double to two63. 0x402 is epsilon to get us there. - if (val > -two63 - 0x402 && val < two63) - return ((long)val); - - return ThrowIntOvf(); - } - #if TARGET_ARM [RuntimeImport(RuntimeLibrary, "RhpIDiv")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern int RhpIDiv(int i, int j); public static int IDiv(int i, int j) @@ -297,7 +84,7 @@ public static int IDiv(int i, int j) } [RuntimeImport(RuntimeLibrary, "RhpUDiv")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern uint RhpUDiv(uint i, uint j); public static long UDiv(uint i, uint j) @@ -309,7 +96,7 @@ public static long UDiv(uint i, uint j) } [RuntimeImport(RuntimeLibrary, "RhpIMod")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern int RhpIMod(int i, int j); public static int IMod(int i, int j) @@ -323,7 +110,7 @@ public static int IMod(int i, int j) } [RuntimeImport(RuntimeLibrary, "RhpUMod")] - [MethodImplAttribute(MethodImplOptions.InternalCall)] + [MethodImpl(MethodImplOptions.InternalCall)] private static extern uint RhpUMod(uint i, uint j); public static long UMod(uint i, uint j) @@ -339,32 +126,31 @@ public static long UMod(uint i, uint j) // Matching return types of throw helpers enables tailcalling them. It improves performance // of the hot path because of it does not need to raise full stackframe. // - [MethodImpl(MethodImplOptions.NoInlining)] - private static int ThrowIntOvf() + private static long ThrowLngOvf() { throw new OverflowException(); } [MethodImpl(MethodImplOptions.NoInlining)] - private static uint ThrowUIntOvf() + private static long ThrowLngDivByZero() { - throw new OverflowException(); + throw new DivideByZeroException(); } [MethodImpl(MethodImplOptions.NoInlining)] - private static long ThrowLngOvf() + private static ulong ThrowULngDivByZero() { - throw new OverflowException(); + throw new DivideByZeroException(); } +#if TARGET_ARM [MethodImpl(MethodImplOptions.NoInlining)] - private static ulong ThrowULngOvf() + private static int ThrowIntOvf() { throw new OverflowException(); } -#if TARGET_ARM [MethodImpl(MethodImplOptions.NoInlining)] private static int ThrowIntDivByZero() { @@ -377,5 +163,6 @@ private static uint ThrowUIntDivByZero() throw new DivideByZeroException(); } #endif // TARGET_ARM +#endif // TARGET_64BIT } } diff --git a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs index 4ccaff2d6dd9f..fa24787613574 100644 --- a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs +++ b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs @@ -7,6 +7,7 @@ using Internal.IL.Stubs; using Debug = System.Diagnostics.Debug; +using System.Xml.Linq; namespace Internal.IL { @@ -40,6 +41,13 @@ public static MethodDesc GetOptionalHelperEntryPoint(this TypeSystemContext cont return helperMethod; } + public static MethodDesc GetHelperEntryPoint(this TypeSystemContext context, string typeNamespace, string typeName, string methodName) + { + MetadataType helperType = context.SystemModule.GetKnownType(typeNamespace, typeName); + MethodDesc helperMethod = helperType.GetKnownMethod(methodName, null); + return helperMethod; + } + /// /// Emits a call to a throw helper. Use this to emit calls to static parameterless methods that don't return. /// The advantage of using this extension method is that you don't have to deal with what code to emit after diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index cf1d04ca666af..8d5b1357d9e3e 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -164,54 +164,66 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, methodDesc = context.GetHelperEntryPoint("LdTokenHelpers", "GetRuntimeFieldHandle"); break; - case ReadyToRunHelper.Lng2Dbl: - mangledName = "RhpLng2Dbl"; + case ReadyToRunHelper.LMulOfv: + methodDesc = context.GetHelperEntryPoint("System", "Math", "LongMultiplyOverflow"); break; - case ReadyToRunHelper.ULng2Dbl: - mangledName = "RhpULng2Dbl"; + case ReadyToRunHelper.ULMulOvf: + methodDesc = context.GetHelperEntryPoint("System", "Math", "ULongMultiplyOverflow"); break; - case ReadyToRunHelper.Dbl2Lng: - mangledName = "RhpDbl2Lng"; + case ReadyToRunHelper.ULng2Dbl: + methodDesc = context.GetHelperEntryPoint("System", "Math", "ULongToDouble"); break; case ReadyToRunHelper.Dbl2ULng: - mangledName = "RhpDbl2ULng"; - break; - case ReadyToRunHelper.Dbl2Int: - mangledName = "RhpDbl2Int"; - break; - case ReadyToRunHelper.Dbl2UInt: - mangledName = "RhpDbl2UInt"; + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULong"); break; case ReadyToRunHelper.Dbl2IntOvf: - methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2IntOvf"); + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToIntOverflow"); break; case ReadyToRunHelper.Dbl2UIntOvf: - methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2UIntOvf"); + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToUIntOverflow"); break; case ReadyToRunHelper.Dbl2LngOvf: - methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2LngOvf"); + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToLongOverflow"); break; case ReadyToRunHelper.Dbl2ULngOvf: - methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2ULngOvf"); + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULongOverflow"); break; case ReadyToRunHelper.DblRem: - mangledName = "RhpDblRem"; + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleReminder"); break; case ReadyToRunHelper.FltRem: - mangledName = "RhpFltRem"; + methodDesc = context.GetHelperEntryPoint("System", "MathF", "FloatReminder"); break; - case ReadyToRunHelper.LMul: - mangledName = "RhpLMul"; + case ReadyToRunHelper.Dbl2Int: + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt"); break; - case ReadyToRunHelper.LMulOfv: - methodDesc = context.GetHelperEntryPoint("MathHelpers", "LMulOvf"); + case ReadyToRunHelper.Dbl2UInt: + methodDesc = context.GetHelperEntryPoint("System", "MathF", "DoubleToUInt"); break; - case ReadyToRunHelper.ULMulOvf: - methodDesc = context.GetHelperEntryPoint("MathHelpers", "ULMulOvf"); + case ReadyToRunHelper.DblRound: + DefType doubleType = context.GetWellKnownType(WellKnownType.Double); + methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("Round", + new MethodSignature(MethodSignatureFlags.Static, 0, doubleType, [doubleType])); + break; + case ReadyToRunHelper.FltRound: + DefType floatType = context.GetWellKnownType(WellKnownType.Single); + methodDesc = context.SystemModule.GetKnownType("System", "MathF").GetKnownMethod("Round", + new MethodSignature(MethodSignatureFlags.Static, 0, floatType, [floatType])); + break; + + case ReadyToRunHelper.Lng2Dbl: + mangledName = "RhpLng2Dbl"; + break; + case ReadyToRunHelper.Dbl2Lng: + mangledName = "RhpDbl2Lng"; + break; + + case ReadyToRunHelper.LMul: + mangledName = "RhpLMul"; break; case ReadyToRunHelper.Mod: diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs index 7ae867370bdf4..e152de9cb404a 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using Internal.TypeSystem; using Internal.ReadyToRunConstants; @@ -9,6 +10,7 @@ using Debug = System.Diagnostics.Debug; using DependencyList = ILCompiler.DependencyAnalysisFramework.DependencyNodeCore.DependencyList; +using System.Reflection.Emit; #pragma warning disable IDE0060 @@ -1259,6 +1261,51 @@ private void ImportBinaryOperation(ILOpcode opcode) { _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ThrowDivZero), "_divbyzero"); } + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.DblRem), "rem"); + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.FltRem), "rem"); + break; + } + } + + private void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool unsigned) + { + switch (wellKnownType) + { + case WellKnownType.SByte: + case WellKnownType.Int16: + case WellKnownType.Int32: + if (checkOverflow) + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2IntOvf), "conv_i4_ovf"); + } + break; + case WellKnownType.Int64: + if (checkOverflow) + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2LngOvf), "conv_i8_ovf"); + } + break; + case WellKnownType.Byte: + case WellKnownType.UInt16: + case WellKnownType.UInt32: + if (checkOverflow) + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UIntOvf), "conv_u8_ovf"); + } + break; + case WellKnownType.UInt64: + if (checkOverflow) + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2ULngOvf), "conv_u8_ovf"); + } + else + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2ULng), "conv_u8"); + } + break; + case WellKnownType.Single: + case WellKnownType.Double: + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ULng2Dbl), "conv_r"); break; } } @@ -1388,7 +1435,6 @@ private static void ImportStoreIndirect(int token) { } private static void ImportStoreIndirect(TypeDesc type) { } private static void ImportShiftOperation(ILOpcode opcode) { } private static void ImportCompareOperation(ILOpcode opcode) { } - private static void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool unsigned) { } private static void ImportUnaryOperation(ILOpcode opCode) { } private static void ImportCpOpj(int token) { } private static void ImportCkFinite() { } diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index c52c58954165a..eef4d60b64b87 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -259,11 +259,28 @@ DEFINE_FIELD(DELEGATE, METHOD_PTR_AUX, _methodPtrAux) DEFINE_METHOD(DELEGATE, CONSTRUCT_DELEGATE, DelegateConstruct, IM_Obj_IntPtr_RetVoid) DEFINE_METHOD(DELEGATE, GET_INVOKE_METHOD, GetInvokeMethod, IM_RetIntPtr) -DEFINE_CLASS(INT128, System, Int128) -DEFINE_CLASS(UINT128, System, UInt128) +DEFINE_CLASS(INT128, System, Int128) +DEFINE_CLASS(UINT128, System, UInt128) + +DEFINE_CLASS(MATH, System, Math) +DEFINE_METHOD(MATH, LONG_MULTIPLY_OVERFLOW, LongMultiplyOverflow, NoSig) +DEFINE_METHOD(MATH, ULONG_MULTIPLY_OVERFLOW,ULongMultiplyOverflow, NoSig) +DEFINE_METHOD(MATH, ULONG_TO_DOUBLE, ULongToDouble, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_ULONG, DoubleToULong, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_INT_OVERFLOW, DoubleToIntOverflow, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_UINT_OVERFLOW,DoubleToUIntOverflow, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_LONG_OVERFLOW,DoubleToLongOverflow, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_ULONG_OVERFLOW,DoubleToULongOverflow, NoSig) +DEFINE_METHOD(MATH, DOUBLE_REMINDER, DoubleReminder, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_INT, DoubleToInt, NoSig) +DEFINE_METHOD(MATH, DOUBLE_TO_UINT, DoubleToUInt, NoSig) +DEFINE_METHOD(MATH, ROUND, Round, SM_Dbl_RetDbl) + +DEFINE_CLASS(MATHF, System, MathF) +DEFINE_METHOD(MATHF, FLOAT_REMINDER, FloatReminder, NoSig) +DEFINE_METHOD(MATHF, ROUND, Round, SM_Flt_RetFlt) DEFINE_CLASS(DYNAMICMETHOD, ReflectionEmit, DynamicMethod) - DEFINE_CLASS(DYNAMICRESOLVER, ReflectionEmit, DynamicResolver) DEFINE_FIELD(DYNAMICRESOLVER, DYNAMIC_METHOD, m_method) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 7a9538d8ea7dd..fa8cf590c28d6 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -156,6 +156,62 @@ void ECall::PopulateManagedHelpers() pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMCOPY)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__LONG_MULTIPLY_OVERFLOW)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_LMUL_OVF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ULONG_MULTIPLY_OVERFLOW)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_ULMUL_OVF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ULONG_TO_DOUBLE)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_ULNG2DBL, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_ULONG)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2ULNG, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_INT_OVERFLOW)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2INT_OVF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_UINT_OVERFLOW)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2UINT_OVF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_LONG_OVERFLOW)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2LNG_OVF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_ULONG_OVERFLOW)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2ULNG_OVF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_REMINDER)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBLREM, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATHF__FLOAT_REMINDER)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_FLTREM, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_INT)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2INT, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__DOUBLE_TO_UINT)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBL2UINT, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATHF__ROUND)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_FLTROUND, pDest); } static CrstStatic gFCallLock; diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S index c1da6f4dcb801..d027525202781 100644 --- a/src/coreclr/vm/i386/jithelp.S +++ b/src/coreclr/vm/i386/jithelp.S @@ -551,87 +551,6 @@ LOCAL_LABEL(LRszMORE32): ret LEAF_END JIT_LRsz, _TEXT -// *********************************************************************/ -// JIT_Dbl2LngP4x87 -// -// Purpose: -// converts a double to a long truncating toward zero (C semantics) -// -// uses stdcall calling conventions -// -// This code is faster on a P4 than the Dbl2Lng code above, but is -// slower on a PIII. Hence we choose this code when on a P4 or above. -// -LEAF_ENTRY JIT_Dbl2LngP4x87, _TEXT - // get some local space - sub esp, 8 - - #define arg1 [esp + 0x0C] - fld QWORD PTR arg1 // fetch arg - fnstcw WORD PTR arg1 // store FPCW - movzx eax, WORD PTR arg1 // zero extend - wide - or ah, 0x0C // turn on OE and DE flags - mov DWORD PTR [esp], eax // store new FPCW bits - fldcw WORD PTR [esp] // reload FPCW with new bits - fistp QWORD PTR [esp] // convert - - // reload FP result - mov eax, DWORD PTR [esp] - mov edx, DWORD PTR [esp + 4] - - // reload original FPCW value - fldcw WORD PTR arg1 - #undef arg1 - - // restore stack - add esp, 8 - - ret -LEAF_END JIT_Dbl2LngP4x87, _TEXT - -// *********************************************************************/ -// JIT_Dbl2LngSSE3 -// -// Purpose: -// converts a double to a long truncating toward zero (C semantics) -// -// uses stdcall calling conventions -// -// This code is faster than the above P4 x87 code for Intel processors -// equal or later than Core2 and Atom that have SSE3 support -// -LEAF_ENTRY JIT_Dbl2LngSSE3, _TEXT - // get some local space - sub esp, 8 - - fld QWORD PTR [esp + 0x0C] // fetch arg - fisttp QWORD PTR [esp] // convert - mov eax, DWORD PTR [esp] // reload FP result - mov edx, DWORD PTR [esp + 4] - - // restore stack - add esp, 8 - - ret -LEAF_END JIT_Dbl2LngSSE3, _TEXT - -// *********************************************************************/ -// JIT_Dbl2IntSSE2 -// -// Purpose: -// converts a double to a long truncating toward zero (C semantics) -// -// uses stdcall calling conventions -// -// This code is even faster than the P4 x87 code for Dbl2LongP4x87, -// but only returns a 32 bit value (only good for int). -// -LEAF_ENTRY JIT_Dbl2IntSSE2, _TEXT - movsd xmm0, [esp + 4] - cvttsd2si eax, xmm0 - ret -LEAF_END JIT_Dbl2IntSSE2, _TEXT - // *********************************************************************/ // JIT_StackProbe // diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm index 5f6890b8312e0..b7446327d47f9 100644 --- a/src/coreclr/vm/i386/jithelp.asm +++ b/src/coreclr/vm/i386/jithelp.asm @@ -36,11 +36,7 @@ JIT_LLsh TEXTEQU <_JIT_LLsh@0> JIT_LRsh TEXTEQU <_JIT_LRsh@0> JIT_LRsz TEXTEQU <_JIT_LRsz@0> JIT_LMul TEXTEQU <@JIT_LMul@16> -JIT_Dbl2LngOvf TEXTEQU <@JIT_Dbl2LngOvf@8> JIT_Dbl2Lng TEXTEQU <@JIT_Dbl2Lng@8> -JIT_Dbl2IntSSE2 TEXTEQU <@JIT_Dbl2IntSSE2@8> -JIT_Dbl2LngP4x87 TEXTEQU <@JIT_Dbl2LngP4x87@8> -JIT_Dbl2LngSSE3 TEXTEQU <@JIT_Dbl2LngSSE3@8> JIT_InternalThrowFromHelper TEXTEQU <@JIT_InternalThrowFromHelper@4> JIT_WriteBarrierReg_PreGrow TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0> JIT_WriteBarrierReg_PostGrow TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0> @@ -635,181 +631,6 @@ LMul_hard: JIT_LMul ENDP -;*********************************************************************/ -; JIT_Dbl2LngOvf - -;Purpose: -; converts a double to a long truncating toward zero (C semantics) -; with check for overflow -; -; uses stdcall calling conventions -; -PUBLIC JIT_Dbl2LngOvf -JIT_Dbl2LngOvf PROC - fnclex - fld qword ptr [esp+4] - push ecx - push ecx - fstp qword ptr [esp] - call JIT_Dbl2Lng - mov ecx,eax - fnstsw ax - test ax,01h - jnz Dbl2LngOvf_throw - mov eax,ecx - ret 8 - -Dbl2LngOvf_throw: - mov ECX, CORINFO_OverflowException_ASM - call JIT_InternalThrowFromHelper - ret 8 -JIT_Dbl2LngOvf ENDP - -;*********************************************************************/ -; JIT_Dbl2Lng - -;Purpose: -; converts a double to a long truncating toward zero (C semantics) -; -; uses stdcall calling conventions -; -; note that changing the rounding mode is very expensive. This -; routine basiclly does the truncation semantics without changing -; the rounding mode, resulting in a win. -; -PUBLIC JIT_Dbl2Lng -JIT_Dbl2Lng PROC - fld qword ptr[ESP+4] ; fetch arg - lea ecx,[esp-8] - sub esp,16 ; allocate frame - and ecx,-8 ; align pointer on boundary of 8 - fld st(0) ; duplciate top of stack - fistp qword ptr[ecx] ; leave arg on stack, also save in temp - fild qword ptr[ecx] ; arg, round(arg) now on stack - mov edx,[ecx+4] ; high dword of integer - mov eax,[ecx] ; low dword of integer - test eax,eax - je integer_QNaN_or_zero - -arg_is_not_integer_QNaN: - fsubp st(1),st ; TOS=d-round(d), - ; { st(1)=st(1)-st & pop ST } - test edx,edx ; what's sign of integer - jns positive - ; number is negative - ; dead cycle - ; dead cycle - fstp dword ptr[ecx] ; result of subtraction - mov ecx,[ecx] ; dword of difference(single precision) - add esp,16 - xor ecx,80000000h - add ecx,7fffffffh ; if difference>0 then increment integer - adc eax,0 ; inc eax (add CARRY flag) - adc edx,0 ; propagate carry flag to upper bits - ret 8 - -positive: - fstp dword ptr[ecx] ;17-18 ; result of subtraction - mov ecx,[ecx] ; dword of difference (single precision) - add esp,16 - add ecx,7fffffffh ; if difference<0 then decrement integer - sbb eax,0 ; dec eax (subtract CARRY flag) - sbb edx,0 ; propagate carry flag to upper bits - ret 8 - -integer_QNaN_or_zero: - test edx,7fffffffh - jnz arg_is_not_integer_QNaN - fstp st(0) ;; pop round(arg) - fstp st(0) ;; arg - add esp,16 - ret 8 -JIT_Dbl2Lng ENDP - -;*********************************************************************/ -; JIT_Dbl2LngP4x87 - -;Purpose: -; converts a double to a long truncating toward zero (C semantics) -; -; uses stdcall calling conventions -; -; This code is faster on a P4 than the Dbl2Lng code above, but is -; slower on a PIII. Hence we choose this code when on a P4 or above. -; -PUBLIC JIT_Dbl2LngP4x87 -JIT_Dbl2LngP4x87 PROC -arg1 equ <[esp+0Ch]> - - sub esp, 8 ; get some local space - - fld qword ptr arg1 ; fetch arg - fnstcw word ptr arg1 ; store FPCW - movzx eax, word ptr arg1 ; zero extend - wide - or ah, 0Ch ; turn on OE and DE flags - mov dword ptr [esp], eax ; store new FPCW bits - fldcw word ptr [esp] ; reload FPCW with new bits - fistp qword ptr [esp] ; convert - mov eax, dword ptr [esp] ; reload FP result - mov edx, dword ptr [esp+4] ; - fldcw word ptr arg1 ; reload original FPCW value - - add esp, 8 ; restore stack - - ret 8 -JIT_Dbl2LngP4x87 ENDP - -;*********************************************************************/ -; JIT_Dbl2LngSSE3 - -;Purpose: -; converts a double to a long truncating toward zero (C semantics) -; -; uses stdcall calling conventions -; -; This code is faster than the above P4 x87 code for Intel processors -; equal or later than Core2 and Atom that have SSE3 support -; -.686P -.XMM -PUBLIC JIT_Dbl2LngSSE3 -JIT_Dbl2LngSSE3 PROC -arg1 equ <[esp+0Ch]> - - sub esp, 8 ; get some local space - - fld qword ptr arg1 ; fetch arg - fisttp qword ptr [esp] ; convert - mov eax, dword ptr [esp] ; reload FP result - mov edx, dword ptr [esp+4] - - add esp, 8 ; restore stack - - ret 8 -JIT_Dbl2LngSSE3 ENDP -.586 - -;*********************************************************************/ -; JIT_Dbl2IntSSE2 - -;Purpose: -; converts a double to a long truncating toward zero (C semantics) -; -; uses stdcall calling conventions -; -; This code is even faster than the P4 x87 code for Dbl2LongP4x87, -; but only returns a 32 bit value (only good for int). -; -.686P -.XMM -PUBLIC JIT_Dbl2IntSSE2 -JIT_Dbl2IntSSE2 PROC - $movsd xmm0, [esp+4] - cvttsd2si eax, xmm0 - ret 8 -JIT_Dbl2IntSSE2 ENDP -.586 - ;*********************************************************************/ ; This is the small write barrier thunk we use when we know the @@ -1212,39 +1033,6 @@ JIT_TailCallVSDLeave: JIT_TailCall ENDP - -;------------------------------------------------------------------------------ - -; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor) -@JIT_FltRem@8 proc public - fld dword ptr [esp+4] ; divisor - fld dword ptr [esp+8] ; dividend -fremloop: - fprem - fstsw ax - fwait - sahf - jp fremloop ; Continue while the FPU status bit C2 is set - fxch ; swap, so divisor is on top and result is in st(1) - fstp ST(0) ; Pop the divisor from the FP stack - retn 8 ; Return value is in st(0) -@JIT_FltRem@8 endp - -; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor) -@JIT_DblRem@16 proc public - fld qword ptr [esp+4] ; divisor - fld qword ptr [esp+12] ; dividend -fremloopd: - fprem - fstsw ax - fwait - sahf - jp fremloopd ; Continue while the FPU status bit C2 is set - fxch ; swap, so divisor is on top and result is in st(1) - fstp ST(0) ; Pop the divisor from the FP stack - retn 16 ; Return value is in st(0) -@JIT_DblRem@16 endp - ;------------------------------------------------------------------------------ ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code. diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp index 08360e9ff0c06..9ad2fdf112d95 100644 --- a/src/coreclr/vm/i386/jitinterfacex86.cpp +++ b/src/coreclr/vm/i386/jitinterfacex86.cpp @@ -96,25 +96,6 @@ extern "C" void STDCALL WriteBarrierAssert(BYTE* ptr, Object* obj) #endif // _DEBUG -#ifndef TARGET_UNIX - -HCIMPL1_V(INT32, JIT_Dbl2IntOvf, double val) -{ - FCALL_CONTRACT; - - INT64 ret = HCCALL1_V(JIT_Dbl2Lng, val); - - if (ret != (INT32) ret) - goto THROW; - - return (INT32) ret; - -THROW: - FCThrow(kOverflowException); -} -HCIMPLEND -#endif // TARGET_UNIX - FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_); @@ -961,32 +942,6 @@ void InitJITHelpers1() JIT_TrialAlloc::Flags flags = GCHeapUtilities::UseThreadAllocationContexts() ? JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL; - // Get CPU features and check for SSE2 support. - // This code should eventually probably be moved into codeman.cpp, - // where we set the cpu feature flags for the JIT based on CPU type and features. - int cpuFeatures[4]; - __cpuid(cpuFeatures, 1); - - DWORD dwCPUFeaturesECX = cpuFeatures[2]; - DWORD dwCPUFeaturesEDX = cpuFeatures[3]; - - // If bit 26 (SSE2) is set, then we can use the SSE2 flavors - // and faster x87 implementation for the P4 of Dbl2Lng. - if (dwCPUFeaturesEDX & (1<<26)) - { - SetJitHelperFunction(CORINFO_HELP_DBL2INT, JIT_Dbl2IntSSE2); - if (dwCPUFeaturesECX & 1) // check SSE3 - { - SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngSSE3); - SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngSSE3); - } - else - { - SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngP4x87); // SSE2 only for signed - SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngP4x87); - } - } - if (!(TrackAllocationsEnabled() || LoggingOn(LF_GCALLOC, LL_INFO10) #ifdef _DEBUG diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 1da0211496029..092dc342547bb 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -91,6 +91,32 @@ #include +#if !defined(HOST_64BIT) && !defined(TARGET_X86) +/*********************************************************************/ +HCIMPL2_VV(UINT64, JIT_LLsh, UINT64 num, int shift) +{ + FCALL_CONTRACT; + return num << (shift & 0x3F); +} +HCIMPLEND + +/*********************************************************************/ +HCIMPL2_VV(INT64, JIT_LRsh, INT64 num, int shift) +{ + FCALL_CONTRACT; + return num >> (shift & 0x3F); +} +HCIMPLEND + +/*********************************************************************/ +HCIMPL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift) +{ + FCALL_CONTRACT; + return num >> (shift & 0x3F); +} +HCIMPLEND +#endif // !HOST_64BIT && !TARGET_X86 + // // helper macro to multiply two 32-bit uints // @@ -137,117 +163,6 @@ HCIMPL2_VV(INT64, JIT_LMul, INT64 val1, INT64 val2) HCIMPLEND #endif // !TARGET_X86 || TARGET_UNIX -/*********************************************************************/ -HCIMPL2_VV(INT64, JIT_LMulOvf, INT64 val1, INT64 val2) -{ - FCALL_CONTRACT; - - // This short-cut does not actually help since the multiplication - // of two 32-bit signed ints compiles into the call to a slow helper - // if (Is32BitSigned(val1) && Is32BitSigned(val2)) - // return (INT64)(INT32)val1 * (INT64)(INT32)val2; - - INDEBUG(INT64 expected = val1 * val2;) - INT64 ret; - - // Remember the sign of the result - INT32 sign = Hi32Bits(val1) ^ Hi32Bits(val2); - - // Convert to unsigned multiplication - if (val1 < 0) val1 = -val1; - if (val2 < 0) val2 = -val2; - - // Get the upper 32 bits of the numbers - UINT32 val1High = Hi32Bits(val1); - UINT32 val2High = Hi32Bits(val2); - - UINT64 valMid; - - if (val1High == 0) { - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val2High, val1); - } - else { - if (val2High != 0) - goto ThrowExcep; - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val1High, val2); - } - - // See if any bits after bit 32 are set - if (Hi32Bits(valMid) != 0) - goto ThrowExcep; - - ret = Mul32x32To64(val1, val2) + ShiftToHi32Bits((UINT32)(valMid)); - - // check for overflow - if (Hi32Bits(ret) < (UINT32)valMid) - goto ThrowExcep; - - if (sign >= 0) { - // have we spilled into the sign bit? - if (ret < 0) - goto ThrowExcep; - } - else { - ret = -ret; - // have we spilled into the sign bit? - if (ret > 0) - goto ThrowExcep; - } - _ASSERTE(ret == expected); - return ret; - -ThrowExcep: - FCThrow(kOverflowException); -} -HCIMPLEND - -/*********************************************************************/ -HCIMPL2_VV(UINT64, JIT_ULMulOvf, UINT64 val1, UINT64 val2) -{ - FCALL_CONTRACT; - - INDEBUG(UINT64 expected = val1 * val2;) - UINT64 ret; - - // Get the upper 32 bits of the numbers - UINT32 val1High = Hi32Bits(val1); - UINT32 val2High = Hi32Bits(val2); - - UINT64 valMid; - - if (val1High == 0) { - if (val2High == 0) - return Mul32x32To64(val1, val2); - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val2High, val1); - } - else { - if (val2High != 0) - goto ThrowExcep; - // Compute the 'middle' bits of the long multiplication - valMid = Mul32x32To64(val1High, val2); - } - - // See if any bits after bit 32 are set - if (Hi32Bits(valMid) != 0) - goto ThrowExcep; - - ret = Mul32x32To64(val1, val2) + ShiftToHi32Bits((UINT32)(valMid)); - - // check for overflow - if (Hi32Bits(ret) < (UINT32)valMid) - goto ThrowExcep; - - _ASSERTE(ret == expected); - return ret; - -ThrowExcep: - FCThrow(kOverflowException); - } -HCIMPLEND - /*********************************************************************/ HCIMPL2(INT32, JIT_Div, INT32 dividend, INT32 divisor) { @@ -450,32 +365,6 @@ HCIMPL2_VV(UINT64, JIT_ULMod, UINT64 dividend, UINT64 divisor) } HCIMPLEND -#if !defined(HOST_64BIT) && !defined(TARGET_X86) -/*********************************************************************/ -HCIMPL2_VV(UINT64, JIT_LLsh, UINT64 num, int shift) -{ - FCALL_CONTRACT; - return num << (shift & 0x3F); -} -HCIMPLEND - -/*********************************************************************/ -HCIMPL2_VV(INT64, JIT_LRsh, INT64 num, int shift) -{ - FCALL_CONTRACT; - return num >> (shift & 0x3F); -} -HCIMPLEND - -/*********************************************************************/ -HCIMPL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift) -{ - FCALL_CONTRACT; - return num >> (shift & 0x3F); -} -HCIMPLEND -#endif // !HOST_64BIT && !TARGET_X86 - #include @@ -488,282 +377,22 @@ HCIMPLEND #include /*********************************************************************/ -// -HCIMPL1_V(double, JIT_ULng2Dbl, UINT64 val) -{ - FCALL_CONTRACT; - - double conv = (double) ((INT64) val); - if (conv < 0) - conv += (4294967296.0 * 4294967296.0); // add 2^64 - _ASSERTE(conv >= 0); - return(conv); -} -HCIMPLEND - -/*********************************************************************/ -// needed for ARM and RyuJIT-x86 +// needed for ARM and x86 HCIMPL1_V(double, JIT_Lng2Dbl, INT64 val) { FCALL_CONTRACT; - return double(val); + return (double)val; } HCIMPLEND -//-------------------------------------------------------------------------- -template -ftype modftype(ftype value, ftype *iptr); -template <> float modftype(float value, float *iptr) { return modff(value, iptr); } -template <> double modftype(double value, double *iptr) { return modf(value, iptr); } - -// round to nearest, round to even if tied -template -ftype BankersRound(ftype value) -{ - if (value < 0.0) return -BankersRound (-value); - - ftype integerPart; - modftype( value, &integerPart ); - - // if decimal part is exactly .5 - if ((value -(integerPart +0.5)) == 0.0) - { - // round to even - if (fmod(ftype(integerPart), ftype(2.0)) == 0.0) - return integerPart; - - // Else return the nearest even integer - return (ftype)copysign(ceil(fabs(value+0.5)), - value); - } - - // Otherwise round to closest - return (ftype)copysign(floor(fabs(value)+0.5), - value); -} - - -/*********************************************************************/ -// round double to nearest int (as double) -HCIMPL1_V(double, JIT_DoubleRound, double val) -{ - FCALL_CONTRACT; - return BankersRound(val); -} -HCIMPLEND - -/*********************************************************************/ -// round float to nearest int (as float) -HCIMPL1_V(float, JIT_FloatRound, float val) -{ - FCALL_CONTRACT; - return BankersRound(val); -} -HCIMPLEND - -/*********************************************************************/ -// Call fast Dbl2Lng conversion - used by functions below -FORCEINLINE INT64 FastDbl2Lng(double val) -{ -#ifdef TARGET_X86 - FCALL_CONTRACT; - return HCCALL1_V(JIT_Dbl2Lng, val); -#else - FCALL_CONTRACT; - return((__int64) val); -#endif -} - -/*********************************************************************/ -HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val) -{ - FCALL_CONTRACT; - - // Note that this expression also works properly for val = NaN case - if (val > -1.0 && val < 4294967296.0) - return((UINT32)FastDbl2Lng(val)); - - FCThrow(kOverflowException); -} -HCIMPLEND - -/*********************************************************************/ -HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) -{ - FCALL_CONTRACT; - - const double two63 = 2147483648.0 * 4294967296.0; - UINT64 ret; - if (val < two63) { - ret = FastDbl2Lng(val); - } - else { - // subtract 0x8000000000000000, do the convert then add it back again - ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); - } - return ret; -} -HCIMPLEND - -/*********************************************************************/ -HCIMPL1_V(UINT64, JIT_Dbl2ULngOvf, double val) -{ - FCALL_CONTRACT; - - const double two64 = 4294967296.0 * 4294967296.0; - // Note that this expression also works properly for val = NaN case - if (val > -1.0 && val < two64) { - const double two63 = 2147483648.0 * 4294967296.0; - UINT64 ret; - if (val < two63) { - ret = FastDbl2Lng(val); - } - else { - // subtract 0x8000000000000000, do the convert then add it back again - ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); - } -#ifdef _DEBUG - // since no overflow can occur, the value always has to be within 1 - double roundTripVal = HCCALL1_V(JIT_ULng2Dbl, ret); - _ASSERTE(val - 1.0 <= roundTripVal && roundTripVal <= val + 1.0); -#endif // _DEBUG - return ret; - } - - FCThrow(kOverflowException); -} -HCIMPLEND - - -#if !defined(TARGET_X86) || defined(TARGET_UNIX) - HCIMPL1_V(INT64, JIT_Dbl2Lng, double val) { FCALL_CONTRACT; - return((INT64)val); + return (INT64)val; } HCIMPLEND -HCIMPL1_V(int, JIT_Dbl2IntOvf, double val) -{ - FCALL_CONTRACT; - - const double two31 = 2147483648.0; - - // Note that this expression also works properly for val = NaN case - if (val > -two31 - 1 && val < two31) - return((INT32)val); - - FCThrow(kOverflowException); -} -HCIMPLEND - -HCIMPL1_V(INT64, JIT_Dbl2LngOvf, double val) -{ - FCALL_CONTRACT; - - const double two63 = 2147483648.0 * 4294967296.0; - - // Note that this expression also works properly for val = NaN case - // We need to compare with the very next double to two63. 0x402 is epsilon to get us there. - if (val > -two63 - 0x402 && val < two63) - return((INT64)val); - - FCThrow(kOverflowException); -} -HCIMPLEND - -#ifndef TARGET_WINDOWS -namespace -{ - bool isnan(float val) - { - UINT32 bits = *reinterpret_cast(&val); - return (bits & 0x7FFFFFFFU) > 0x7F800000U; - } - bool isnan(double val) - { - UINT64 bits = *reinterpret_cast(&val); - return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL; - } - bool isfinite(float val) - { - UINT32 bits = *reinterpret_cast(&val); - return (~bits & 0x7F800000U) != 0; - } - bool isfinite(double val) - { - UINT64 bits = *reinterpret_cast(&val); - return (~bits & 0x7FF0000000000000ULL) != 0; - } -} -#endif - -HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor) -{ - FCALL_CONTRACT; - - // - // From the ECMA standard: - // - // If [divisor] is zero or [dividend] is infinity - // the result is NaN. - // If [divisor] is infinity, - // the result is [dividend] (negated for -infinity***). - // - // ***"negated for -infinity" has been removed from the spec - // - - if (divisor==0 || !isfinite(dividend)) - { - UINT32 NaN = CLR_NAN_32; - return *(float *)(&NaN); - } - else if (!isfinite(divisor) && !isnan(divisor)) - { - return dividend; - } - // else... -#if 0 - // COMPILER BUG WITH FMODF() + /Oi, USE FMOD() INSTEAD - return fmodf(dividend,divisor); -#else - return (float)fmod((double)dividend,(double)divisor); -#endif -} -HCIMPLEND - -HCIMPL2_VV(double, JIT_DblRem, double dividend, double divisor) -{ - FCALL_CONTRACT; - - // - // From the ECMA standard: - // - // If [divisor] is zero or [dividend] is infinity - // the result is NaN. - // If [divisor] is infinity, - // the result is [dividend] (negated for -infinity***). - // - // ***"negated for -infinity" has been removed from the spec - // - if (divisor==0 || !isfinite(dividend)) - { - UINT64 NaN = CLR_NAN_64; - return *(double *)(&NaN); - } - else if (!isfinite(divisor) && !isnan(divisor)) - { - return dividend; - } - // else... - return(fmod(dividend,divisor)); -} -HCIMPLEND - -#endif // !TARGET_X86 || TARGET_UNIX - #include diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 5e6b0cbeeafdd..dbf4aa43de507 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10692,7 +10692,17 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF || dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMSET || dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMZERO || - dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY) + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_LMUL_OVF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULMUL_OVF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULNG2DBL || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2ULNG || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2INT_OVF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2UINT_OVF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2LNG_OVF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2ULNG_OVF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBLREM || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_FLTREM) { Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper); _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP); @@ -10722,12 +10732,15 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ { // Cache it for future uses to avoid taking the lock again. hlpFinalTierAddrTable[dynamicFtnNum] = finalTierAddr; + EE_TO_JIT_TRANSITION_LEAF(); return finalTierAddr; } } } *ppIndirection = ((FixupPrecode*)pPrecode)->GetTargetSlot(); + + EE_TO_JIT_TRANSITION_LEAF(); return NULL; } diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index bbca5c355fbb9..fe11b7c93e9ba 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -325,17 +325,6 @@ EXTERN_C FCDECL2(Object*, JIT_NewArr1OBJ_MP_InlineGetThread, CORINFO_CLASS_HANDL EXTERN_C FCDECL2_VV(INT64, JIT_LMul, INT64 val1, INT64 val2); -EXTERN_C FCDECL1_V(INT64, JIT_Dbl2Lng, double val); -EXTERN_C FCDECL1_V(INT64, JIT_Dbl2IntSSE2, double val); -EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngP4x87, double val); -EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngSSE3, double val); -EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngOvf, double val); - -EXTERN_C FCDECL1_V(INT32, JIT_Dbl2IntOvf, double val); - -EXTERN_C FCDECL2_VV(float, JIT_FltRem, float dividend, float divisor); -EXTERN_C FCDECL2_VV(double, JIT_DblRem, double dividend, double divisor); - #ifndef HOST_64BIT #ifdef TARGET_X86 // JIThelp.asm diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index 60a638198f8f6..34211170f9c3d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1491,5 +1491,251 @@ public static double ScaleB(double x, int n) double u = BitConverter.Int64BitsToDouble(((long)(0x3ff + n) << 52)); return y * u; } + + [StackTraceHidden] + private static long LongMultiplyOverflow(long i, long j) + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static uint Hi32Bits(ulong a) + { + return (uint)(a >> 32); + } + +#if DEBUG + long result = i * j; +#endif + + // Remember the sign of the result + int sign = (int)(Hi32Bits((ulong)i) ^ Hi32Bits((ulong)j)); + + // Convert to unsigned multiplication + if (i < 0) i = -i; + if (j < 0) j = -j; + + // Get the upper 32 bits of the numbers + uint val1High = Hi32Bits((ulong)i); + uint val2High = Hi32Bits((ulong)j); + + ulong valMid; + + if (val1High == 0) + { + // Compute the 'middle' bits of the long multiplication + valMid = BigMul(val2High, (uint)i); + } + else + { + if (val2High != 0) + goto Overflow; + // Compute the 'middle' bits of the long multiplication + valMid = BigMul(val1High, (uint)j); + } + + // See if any bits after bit 32 are set + if (Hi32Bits(valMid) != 0) + goto Overflow; + + long ret = (long)(BigMul((uint)i, (uint)j) + (valMid << 32)); + + // check for overflow + if (Hi32Bits((ulong)ret) < (uint)valMid) + goto Overflow; + + if (sign >= 0) + { + // have we spilled into the sign bit? + if (ret < 0) + goto Overflow; + } + else + { + ret = -ret; + // have we spilled into the sign bit? + if (ret > 0) + goto Overflow; + } + +#if DEBUG + Debug.Assert(ret == result, $"Multiply overflow got: {ret}, expected: {result}"); +#endif + return ret; + + Overflow: + ThrowHelper.ThrowOverflowException(); + return 0; + } + + [StackTraceHidden] + private static ulong ULongMultiplyOverflow(ulong i, ulong j) + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static uint Hi32Bits(ulong a) + { + return (uint)(a >> 32); + } + + // Get the upper 32 bits of the numbers + uint val1High = Hi32Bits(i); + uint val2High = Hi32Bits(j); + + ulong valMid; + + if (val1High == 0) + { + if (val2High == 0) + return BigMul((uint)i, (uint)j); + // Compute the 'middle' bits of the long multiplication + valMid = BigMul(val2High, (uint)i); + } + else + { + if (val2High != 0) + goto Overflow; + // Compute the 'middle' bits of the long multiplication + valMid = BigMul(val1High, (uint)j); + } + + // See if any bits after bit 32 are set + if (Hi32Bits(valMid) != 0) + goto Overflow; + + ulong ret = BigMul((uint)i, (uint)j) + (valMid << 32); + + // check for overflow + if (Hi32Bits(ret) < (uint)valMid) + goto Overflow; + + Debug.Assert(ret == i * j, $"Multiply overflow got: {ret}, expected: {i * j}"); + return ret; + + Overflow: + ThrowHelper.ThrowOverflowException(); + return 0; + } + + private static double ULongToDouble(ulong val) + { + double conv = (long)val; + if (conv < 0) + conv += 4294967296.0 * 4294967296.0; // add 2^64 + Debug.Assert(conv >= 0); + return conv; + } + + private static ulong DoubleToULong(double val) + { + const double two63 = 2147483648.0 * 4294967296.0; + ulong ret; + if (val < two63) + { + ret = (ulong)(long)val; + } + else + { + // subtract 0x8000000000000000, do the convert then add it back again + ret = (ulong)(long)(val - two63) + 0x8000000000000000UL; + } + return ret; + } + + [StackTraceHidden] + private static int DoubleToIntOverflow(double val) + { + const double two31 = 2147483648.0; + + // Note that this expression also works properly for val = NaN case + if (val is > -two31 - 1 and < two31) + { + int ret = (int)val; + // since no overflow can occur, the value always has to be within 1 + Debug.Assert(val - 1.0 <= ret); + Debug.Assert(ret <= val + 1.0); + return ret; + } + + ThrowHelper.ThrowOverflowException(); + return 0; + } + + [StackTraceHidden] + private static uint DoubleToUIntOverflow(double val) + { + // Note that this expression also works properly for val = NaN case + if (val is > -1.0 and < 4294967296.0) + { + uint ret = (uint)(long)val; + // since no overflow can occur, the value always has to be within 1 + Debug.Assert(val - 1.0 <= ret); + Debug.Assert(ret <= val + 1.0); + return ret; + } + + ThrowHelper.ThrowOverflowException(); + return 0; + } + + [StackTraceHidden] + private static long DoubleToLongOverflow(double val) + { + const double two63 = 2147483648.0 * 4294967296.0; + + // Note that this expression also works properly for val = NaN case + // We need to compare with the very next double to two63. 0x402 is epsilon to get us there. + if (val is > -two63 - 0x402 and < two63) + { + long ret = (long)val; + // since no overflow can occur, the value always has to be within 1 + Debug.Assert(val - 1.0 <= ret); + Debug.Assert(ret <= val + 1.0); + return ret; + } + + ThrowHelper.ThrowOverflowException(); + return 0; + } + + [StackTraceHidden] + private static ulong DoubleToULongOverflow(double val) + { + const double two64 = 4294967296.0 * 4294967296.0; + // Note that this expression also works properly for val = NaN case + if (val is > -1.0 and < two64) + { + ulong ret = (ulong)val; + // since no overflow can occur, the value always has to be within 1 + Debug.Assert(val - 1.0 <= ret); + Debug.Assert(ret <= val + 1.0); + return ret; + } + + ThrowHelper.ThrowOverflowException(); + return 0; + } + + private static double DoubleReminder(double dividend, double divisor) + { + // From the ECMA standard: + // + // If [divisor] is zero or [dividend] is infinity + // the result is NaN. + // If [divisor] is infinity, + // the result is [dividend] (negated for -infinity***). + // + // ***"negated for -infinity" has been removed from the spec + if (divisor == 0 || !double.IsFinite(dividend)) + { + return double.NaN; + } + + if (!double.IsFinite(divisor) && !double.IsNaN(divisor)) + { + return dividend; + } + + return FMod(dividend, divisor); + } + + private static int DoubleToInt(double val) => (int)(long)val; + private static uint DoubleToUInt(double val) => (uint)(long)val; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/MathF.cs b/src/libraries/System.Private.CoreLib/src/System/MathF.cs index cc0795255d0c8..d58e059d26c26 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MathF.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MathF.cs @@ -511,5 +511,28 @@ public static float ScaleB(float x, int n) float u = BitConverter.Int32BitsToSingle(((int)(0x7f + n) << 23)); return y * u; } + + private static float FloatReminder(float dividend, float divisor) + { + // From the ECMA standard: + // + // If [divisor] is zero or [dividend] is infinity + // the result is NaN. + // If [divisor] is infinity, + // the result is [dividend] (negated for -infinity***). + // + // ***"negated for -infinity" has been removed from the spec + if (divisor == 0 || !float.IsFinite(dividend)) + { + return float.NaN; + } + + if (!float.IsFinite(divisor) && !float.IsNaN(divisor)) + { + return dividend; + } + + return FMod(dividend, divisor); + } } } From fe179dffd349a45b4be34d200f4d56931cc11689 Mon Sep 17 00:00:00 2001 From: Deepak Rajendrakumaran Date: Thu, 29 Feb 2024 16:02:00 -0800 Subject: [PATCH 02/14] Adding new instructions. --- src/coreclr/jit/emit.h | 6 +++-- src/coreclr/jit/emitxarch.cpp | 41 ++++++++++++++++++++--------------- src/coreclr/jit/instr.cpp | 16 ++++++++++---- src/coreclr/jit/instrsxarch.h | 9 +++++--- 4 files changed, 45 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index e5540a290b4c8..9ce5d038c3c1b 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3999,7 +3999,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const case INS_comiss: case INS_cvtss2sd: case INS_cvtss2si: - case INS_cvttss2si: + case INS_cvttss2si32: + case INS_cvttss2si64: case INS_divss: case INS_extractps: case INS_insertps: @@ -4042,7 +4043,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const case INS_comisd: case INS_cvtsd2si: case INS_cvtsd2ss: - case INS_cvttsd2si: + case INS_cvttsd2si32: + case INS_cvttsd2si64: case INS_divsd: case INS_maxsd: case INS_minsd: diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1bafb6796d807..91ece88fac16f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1522,9 +1522,11 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const switch (ins) { case INS_cvtss2si: - case INS_cvttss2si: + case INS_cvttss2si32: + case INS_cvttss2si64: case INS_cvtsd2si: - case INS_cvttsd2si: + case INS_cvttsd2si32: + case INS_cvttsd2si64: case INS_movd: case INS_movnti: case INS_andn: @@ -1544,7 +1546,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const #endif // TARGET_AMD64 case INS_vcvtsd2usi: case INS_vcvtss2usi: - case INS_vcvttsd2usi: { if (attr == EA_8BYTE) { @@ -2723,8 +2724,10 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_blsmsk: case INS_blsr: case INS_bzhi: - case INS_cvttsd2si: - case INS_cvttss2si: + case INS_cvttsd2si32: + case INS_cvttsd2si64: + case INS_cvttss2si32: + case INS_cvttss2si64: case INS_cvtsd2si: case INS_cvtss2si: case INS_extractps: @@ -2748,7 +2751,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) #endif case INS_vcvtsd2usi: case INS_vcvtss2usi: - case INS_vcvttsd2usi: + case INS_vcvttsd2usi32: + case INS_vcvttsd2usi64: case INS_vcvttss2usi32: case INS_vcvttss2usi64: { @@ -11605,22 +11609,20 @@ void emitter::emitDispIns( break; } - case INS_cvttsd2si: + case INS_cvttsd2si32: + case INS_cvttsd2si64: case INS_cvtss2si: case INS_cvtsd2si: - case INS_cvttss2si: + case INS_cvttss2si32: + case INS_cvttss2si64: case INS_vcvtsd2usi: case INS_vcvtss2usi: - case INS_vcvttsd2usi: - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); - break; - } - + case INS_vcvttsd2usi32: + case INS_vcvttsd2usi64: case INS_vcvttss2usi32: case INS_vcvttss2usi64: { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE)); + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); break; } @@ -19050,7 +19052,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } - case INS_cvttsd2si: + case INS_cvttsd2si32: + case INS_cvttsd2si64: case INS_cvtsd2si: case INS_cvtsi2sd32: case INS_cvtsi2ss32: @@ -19059,7 +19062,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vcvtsd2usi: case INS_vcvtusi2ss32: case INS_vcvtusi2ss64: - case INS_vcvttsd2usi: + case INS_vcvttsd2usi32: + case INS_vcvttsd2usi64: case INS_vcvttss2usi32: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; @@ -19071,7 +19075,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency += PERFSCORE_LATENCY_5C; break; - case INS_cvttss2si: + case INS_cvttss2si32: + case INS_cvttss2si64: case INS_cvtss2si: case INS_vcvtss2usi: result.insThroughput = PERFSCORE_THROUGHPUT_1C; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 3d307ddfe7d96..a68f72cf37a6c 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2378,13 +2378,17 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) switch (to) { case TYP_INT: - return INS_cvttss2si; + return INS_cvttss2si32; case TYP_LONG: - return INS_cvttss2si; + return INS_cvttss2si64; case TYP_FLOAT: return ins_Move_Extend(TYP_FLOAT, false); case TYP_DOUBLE: return INS_cvtss2sd; + case TYP_ULONG: + return INS_vcvttss2usi64; + case TYP_UINT: + return INS_vcvttss2usi32; default: unreached(); } @@ -2394,13 +2398,17 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) switch (to) { case TYP_INT: - return INS_cvttsd2si; + return INS_cvttsd2si32; case TYP_LONG: - return INS_cvttsd2si; + return INS_cvttsd2si64; case TYP_FLOAT: return INS_cvtsd2ss; case TYP_DOUBLE: return ins_Move_Extend(TYP_DOUBLE, false); + case TYP_ULONG: + return INS_vcvttsd2usi64; + case TYP_UINT: + return INS_vcvttsd2usi32; default: unreached(); } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 17443cb978492..8cc89d61cda79 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -201,7 +201,8 @@ INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, INST3(cvtsi2ss32, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single INST3(cvtsi2ss64, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar single INST3(cvtss2si, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD -INST3(cvttss2si, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD +INST3(cvttss2si32, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD +INST3(cvttss2si64, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Divide packed singles INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Return Maximum packed singles @@ -260,7 +261,8 @@ INST3(cvtsi2sd64, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed doubles to DWORDs INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed singles to DWORDs -INST3(cvttsd2si, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs +INST3(cvttsd2si32, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs +INST3(cvttsd2si64, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Divide packed doubles INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) @@ -640,7 +642,8 @@ INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_ INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs -INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD +INST3(vcvttsd2usi32, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD +INST3(vcvttsd2usi64, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned QWORD INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles From 7885cdcb170ffba5c304d2632284e3ae509351d5 Mon Sep 17 00:00:00 2001 From: Deepak Rajendrakumaran Date: Thu, 29 Feb 2024 16:10:03 -0800 Subject: [PATCH 03/14] Lowering intrinsics --- src/coreclr/jit/assertionprop.cpp | 1 + src/coreclr/jit/gentree.h | 12 ++ src/coreclr/jit/hwintrinsiclistxarch.h | 20 +- src/coreclr/jit/hwintrinsicxarch.cpp | 288 +++++++++++++++++++++---- src/coreclr/jit/lowerxarch.cpp | 4 +- 5 files changed, 279 insertions(+), 46 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 51a0d5f271e05..010f8715c2774 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -83,6 +83,7 @@ bool IntegralRange::Contains(int64_t value) const { case TYP_UBYTE: case TYP_USHORT: + case TYP_UINT: return SymbolicIntegerValue::Zero; case TYP_BYTE: return SymbolicIntegerValue::ByteMin; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 328860eb1713f..000c134c346e7 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -556,6 +556,8 @@ enum GenTreeFlags : unsigned int #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS + + GTF_CONVERSION_SATURATED = 0x20000000, // GT_CAST -- conversion operation has saturation behavior }; inline constexpr GenTreeFlags operator ~(GenTreeFlags a) @@ -3886,6 +3888,16 @@ struct GenTreeCast : public GenTreeOp return false; } + + bool IsSaturatedConversion() + { + return (gtFlags & GTF_CONVERSION_SATURATED) != 0; + } + + void SetSaturatedConversion() + { + gtFlags |= GTF_CONVERSION_SATURATED; + } }; // GT_BOX nodes are place markers for boxed values. The "real" tree diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 3093c9ff71a56..660186cefa413 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -273,8 +273,12 @@ HARDWARE_INTRINSIC(Vector512, Create, HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, true, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ConvertToSingle, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Divide, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, EqualsAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -411,7 +415,7 @@ HARDWARE_INTRINSIC(SSE, CompareUnordered, HARDWARE_INTRINSIC(SSE, CompareScalarUnordered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, ConvertToInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, ConvertScalarToVector128Single, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, ConvertToInt32WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE, ConvertToInt32WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) HARDWARE_INTRINSIC(SSE, DivideScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, LoadAlignedVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) @@ -460,7 +464,7 @@ HARDWARE_INTRINSIC(SSE, Xor, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE 64-bit-only Intrinsics HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(SSE_X64, ConvertScalarToVector128Single, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -511,7 +515,7 @@ HARDWARE_INTRINSIC(SSE2, CompareScalarOrdered, HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(SSE2, CompareScalarUnordered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Double, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) @@ -578,7 +582,7 @@ HARDWARE_INTRINSIC(SSE2, Xor, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE2 64-bit-only Intrinsics HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Double, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) @@ -708,9 +712,9 @@ HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -861,7 +865,7 @@ HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToInt32, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) @@ -1027,7 +1031,7 @@ HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F_X64, ConvertToInt64, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index f88cf6ec99ec3..7f6469f992916 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1415,15 +1415,190 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_ConvertToDouble: case NI_Vector256_ConvertToDouble: + case NI_Vector512_ConvertToDouble: + { + assert(sig->numArgs == 1); + assert(varTypeIsLong(simdBaseType) || simdBaseType == TYP_FLOAT); + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + if (varTypeIsLong(simdBaseType)) + { + intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Double + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Double + : NI_AVX512DQ_ConvertToVector512Double; + } + else + { + intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Double + : (simdSize == 32) ? NI_AVX_ConvertToVector256Double + : NI_AVX512F_ConvertToVector512Double; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); + } + break; + } + case NI_Vector128_ConvertToInt64: case NI_Vector256_ConvertToInt64: + case NI_Vector512_ConvertToInt64: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_DOUBLE); +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + op1 = impSIMDPopStack(); + + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b1000: Saturate to Zero + // SNAN: 0b1000: Saturate to Zero + // ZERO: 0b0000 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + for (int i = 0; i < 8; i++) + { + tbl->gtSimdVal.i64[i] = 0x00000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + + // run vfixupimmsd base on table and no flags reporting + GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + GenTree* max_val = + gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast(INT64_MAX)), + simdBaseJitType, simdSize); + GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG), + CORINFO_TYPE_LONG, simdSize); + // we will be using the input value twice + GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); + + // usage 1 --> compare with max value of integer + saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); + // cast it + + intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation + : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + + retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); + + // usage 2 --> use thecompared mask with input value and max value to blend + retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG, simdSize); + } +#endif // TARGET_AMD64 + break; + } + case NI_Vector128_ConvertToUInt32: case NI_Vector256_ConvertToUInt32: + case NI_Vector512_ConvertToUInt32: + { + assert(sig->numArgs == 1); + assert(varTypeIsFloating(simdBaseType)); +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + op1 = impSIMDPopStack(); + + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b1000: + // SNAN: 0b1000 + // ZERO: 0b0000: + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b1000: Saturate to Zero + // +VAL: 0b0000 + for (int i = 0; i < 16; i++) + { + tbl->gtSimdVal.i32[i] = 0x08000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + + // run vfixupimmsd base on table and no flags reporting + GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation + : NI_AVX512F_ConvertToVector512UInt32WithTruncation; + + retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + } +#endif // TARGET_AMD64 + break; + } + case NI_Vector128_ConvertToUInt64: case NI_Vector256_ConvertToUInt64: + case NI_Vector512_ConvertToUInt64: { assert(sig->numArgs == 1); - // TODO-XARCH-CQ: These intrinsics should be accelerated + assert(simdBaseType == TYP_DOUBLE); +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + op1 = impSIMDPopStack(); + + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b1000: + // SNAN: 0b1000 + // ZERO: 0b0000: + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b1000: Saturate to Zero + // +VAL: 0b0000 + for (int i = 0; i < 8; i++) + { + tbl->gtSimdVal.i64[i] = 0x08000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + + // run vfixupimmsd base on table and no flags reporting + GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation + : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + + retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + } +#endif // TARGET_AMD64 break; } @@ -1433,24 +1608,63 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_FLOAT); - - switch (simdSize) +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) { - case 16: - intrinsic = NI_SSE2_ConvertToVector128Int32WithTruncation; - break; - case 32: - intrinsic = NI_AVX_ConvertToVector256Int32WithTruncation; - break; - case 64: - intrinsic = NI_AVX512F_ConvertToVector512Int32WithTruncation; - break; - default: - unreached(); - } + op1 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b1000: Saturate to Zero + // SNAN: 0b1000: Saturate to Zero + // ZERO: 0b0000 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + for (int i = 0; i < 16; i++) + { + tbl->gtSimdVal.i32[i] = 0x00000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + // GenTree* op2Clone; + // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL, + // nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion")); + + // run vfixupimmsd base on table and no flags reporting + GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + GenTree* max_val = gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast(INT32_MAX)), + simdBaseJitType, simdSize); + GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT), + CORINFO_TYPE_INT, simdSize); + // we will be using the input value twice + GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); + + // usage 1 --> compare with max value of integer + saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); + // cast it + + intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation + : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation + : NI_AVX512F_ConvertToVector512Int32WithTruncation; + + retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); + + // usage 2 --> use thecompared mask with input value and max value to blend + retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT, simdSize); + } +#endif // TARGET_AMD64 break; } @@ -1459,31 +1673,33 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector512_ConvertToSingle: { assert(sig->numArgs == 1); - + assert(varTypeIsInt(simdBaseType)); + intrinsic = NI_Illegal; if (simdBaseType == TYP_INT) { - switch (simdSize) + if (simdSize == 16) { - case 16: - intrinsic = NI_SSE2_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX_ConvertToVector256Single; - break; - case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; - break; - default: - unreached(); + intrinsic = NI_SSE2_ConvertToVector128Single; + } + else if (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + intrinsic = NI_AVX_ConvertToVector256Single; + } + else if (simdSize == 64 && IsBaselineVector512IsaSupportedOpportunistically()) + { + intrinsic = NI_AVX512F_ConvertToVector512Single; } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); } - else + else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()) + { + intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single + : NI_AVX512F_ConvertToVector512Single; + } + if (intrinsic != NI_Illegal) { - // TODO-XARCH-CQ: These intrinsics should be accelerated - assert(simdBaseType == TYP_UINT); + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); } break; } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 811657c9a5219..ed1eccc2144af 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -843,7 +843,7 @@ void Lowering::LowerCast(GenTree* tree) if (varTypeIsFloating(srcType)) { noway_assert(!tree->gtOverflow()); - noway_assert(castToType != TYP_ULONG); + assert(castToType != TYP_ULONG || comp->IsBaselineVector512IsaSupportedDebugOnly()); } else if (srcType == TYP_UINT) { @@ -851,7 +851,7 @@ void Lowering::LowerCast(GenTree* tree) } else if (srcType == TYP_ULONG) { - assert(castToType != TYP_FLOAT || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(castToType != TYP_FLOAT || comp->IsBaselineVector512IsaSupportedDebugOnly()); } // Case of src is a small type and dst is a floating point type. From 064fdb73176b61dfe2b36724aa916245de33ae9c Mon Sep 17 00:00:00 2001 From: Deepak Rajendrakumaran Date: Thu, 29 Feb 2024 18:29:44 -0800 Subject: [PATCH 04/14] Handling behaviour + fixing R2R etc # Conflicts: # src/coreclr/inc/jithelpers.h # src/coreclr/jit/morph.cpp # src/coreclr/jit/utils.cpp # src/coreclr/jit/valuenum.cpp # src/coreclr/nativeaot/Runtime/MathHelpers.cpp # src/coreclr/vm/jithelpers.cpp --- docs/design/coreclr/botr/readytorun-format.md | 1 + src/coreclr/inc/corinfo.h | 1 + src/coreclr/inc/jithelpers.h | 1 + src/coreclr/inc/readytorun.h | 1 + src/coreclr/inc/readytorunhelpers.h | 1 + src/coreclr/jit/codegenxarch.cpp | 11 +- src/coreclr/jit/morph.cpp | 167 +++++++++- src/coreclr/jit/simdashwintrinsic.cpp | 313 ++++++++++++++++-- src/coreclr/jit/utils.cpp | 1 + src/coreclr/jit/valuenum.cpp | 6 + src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 47 +++ .../Internal/Runtime/ReadyToRunConstants.cs | 1 + .../Common/JitInterface/CorInfoHelpFunc.cs | 1 + .../ILCompiler.Compiler/Compiler/JitHelper.cs | 3 + .../JitInterface/CorInfoImpl.ReadyToRun.cs | 3 + .../ReadyToRunSignature.cs | 4 + .../JitInterface/CorInfoImpl.RyuJit.cs | 3 + 17 files changed, 517 insertions(+), 48 deletions(-) diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md index a9a5c8b916303..c222ab299e54b 100644 --- a/docs/design/coreclr/botr/readytorun-format.md +++ b/docs/design/coreclr/botr/readytorun-format.md @@ -870,6 +870,7 @@ enum ReadyToRunHelper READYTORUN_HELPER_Dbl2UIntOvf = 0xD5, READYTORUN_HELPER_Dbl2ULng = 0xD6, READYTORUN_HELPER_Dbl2ULngOvf = 0xD7, + READYTORUN_HELPER_Flt2UInt = 0xD8, // Floating point ops READYTORUN_HELPER_DblRem = 0xE0, diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index b4417e60af4d5..a935dd2e53f2a 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -393,6 +393,7 @@ enum CorInfoHelpFunc CORINFO_HELP_DBL2LNG_OVF, CORINFO_HELP_DBL2UINT, // unused CORINFO_HELP_DBL2UINT_OVF, + CORINFO_HELP_FLT2UINT, CORINFO_HELP_DBL2ULNG, CORINFO_HELP_DBL2ULNG_OVF, CORINFO_HELP_FLTREM, diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index e0f65ff5de908..8cb878a14a210 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -59,6 +59,7 @@ DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG_OVF, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT_OVF, NULL, CORINFO_HELP_SIG_8_STACK) + DYNAMICJITHELPER(CORINFO_HELP_FLT2UINT, NULL, CORINFO_HELP_SIG_8_STACK) //Deepak DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG_OVF, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_FLTREM, NULL, CORINFO_HELP_SIG_8_STACK) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 41a4aa251fa74..0c84b7f3baef0 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -399,6 +399,7 @@ enum ReadyToRunHelper READYTORUN_HELPER_Dbl2UIntOvf = 0xD5, READYTORUN_HELPER_Dbl2ULng = 0xD6, READYTORUN_HELPER_Dbl2ULngOvf = 0xD7, + READYTORUN_HELPER_Flt2UInt = 0xD8, // Floating point ops READYTORUN_HELPER_DblRem = 0xE0, diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index bbb586e8eb4a3..695d9d886e261 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -84,6 +84,7 @@ HELPER(READYTORUN_HELPER_Dbl2Lng, CORINFO_HELP_DBL2LNG, HELPER(READYTORUN_HELPER_Dbl2LngOvf, CORINFO_HELP_DBL2LNG_OVF, ) HELPER(READYTORUN_HELPER_Dbl2UInt, CORINFO_HELP_DBL2UINT, ) HELPER(READYTORUN_HELPER_Dbl2UIntOvf, CORINFO_HELP_DBL2UINT_OVF, ) +HELPER(READYTORUN_HELPER_Flt2UInt, CORINFO_HELP_FLT2UINT, ) HELPER(READYTORUN_HELPER_Dbl2ULng, CORINFO_HELP_DBL2ULNG, ) HELPER(READYTORUN_HELPER_Dbl2ULngOvf, CORINFO_HELP_DBL2ULNG_OVF, ) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 223199f35c327..4370a7cf5fc4d 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7641,13 +7641,16 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase. - assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); + // into a helper call by either front-end or lowering phase, unless we have AVX512F + // accelerated conversions. + assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || + compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. // To achieve it we pretend as if we are converting it to a long. - if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT)))) + if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) && + !compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { dstType = TYP_LONG; } @@ -7655,7 +7658,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // Note that we need to specify dstType here so that it will determine // the size of destination integer register and also the rex.w prefix. genConsumeOperands(treeNode->AsOp()); - instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType)); + instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); genProduceReg(treeNode); } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0e7edf4f15902..4b58a8b6899e8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -323,6 +323,139 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) } } } + + // This if check needs to be changed to make sure we only + // block casts which are already Fixed UP. + do + { + if (!tree->gtOverflow() && varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && !varTypeIsSmall(dstType)) + { + if ((dstType == TYP_LONG) && (srcType == TYP_FLOAT)) + { + oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); + srcType = TYP_DOUBLE; + } + if (tree->IsSaturatedConversion()) + { + break; + } + CorInfoType fieldType = (srcType == TYP_DOUBLE) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT; + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + { + if (varTypeIsUnsigned(dstType)) + { + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(TYP_SIMD16); + + // QNAN: 0b1000: + // SNAN: 0b1000 + // ZERO: 0b0000: + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b1000: Saturate to Zero + // +VAL: 0b0000 + tbl->gtSimdVal.i32[0] = 0x08000088; + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&oper); + + // run vfixupimmsd base on table and no flags reporting + GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, oper, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_FixupScalar, fieldType, 16); + + // Convert to scalar + // Here, we try to insert a Vector128 to Scalar node so that the input + // can be provided to the scalar cast + GenTree* retNode1 = + gtNewSimdHWIntrinsicNode(srcType, retNode, NI_Vector128_ToScalar, fieldType, 16); + tree = gtNewCastNode(genActualType(dstType), retNode1, false, dstType); + tree->SetSaturatedConversion(); + return fgMorphTree(tree); + } + else + { + CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG; + + ssize_t actualMaxVal = (dstType == TYP_INT) ? INT32_MAX : INT64_MAX; + + // CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG; + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(TYP_SIMD16); + + // QNAN: 0b1000: + // SNAN: 0b1000 + // ZERO: 0b0000: + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000: Saturate to Zero + // +VAL: 0b0000 + tbl->gtSimdVal.i32[0] = 0x00000088; + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&oper); + + // run vfixupimmsd base on table and no flags reporting + oper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, oper, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_FixupScalar, fieldType, 16); + + GenTree* saturate_val = oper; + + // get the max value vector + + GenTree* max_val = (srcType == TYP_DOUBLE) ? gtNewDconNodeD(static_cast(actualMaxVal)) + : gtNewDconNodeF(static_cast(actualMaxVal)); + GenTree* max_valDup = + (dstType == TYP_INT) ? gtNewIconNode(actualMaxVal, dstType) : gtNewLconNode(actualMaxVal); + max_val = gtNewSimdCreateBroadcastNode(TYP_SIMD16, max_val, fieldType, 16); + max_valDup = gtNewSimdCreateBroadcastNode(TYP_SIMD16, max_valDup, destFieldType, 16); + + // we will be using the input value twice + GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); + + // usage 1 --> compare with max value of integer + saturate_val = gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, saturate_val, max_val, fieldType, 16); + GenTree* retNode1 = + gtNewSimdHWIntrinsicNode(srcType, saturate_valDup, NI_Vector128_ToScalar, fieldType, 16); + // cast it + tree = gtNewCastNode(dstType, retNode1, false, dstType); + tree->SetSaturatedConversion(); + GenTree* tree1 = gtNewSimdCreateBroadcastNode(TYP_SIMD16, tree, destFieldType, 16); + + // usage 2 --> use thecompared mask with input value and max value to blend + // GenTree* dummy = gtNewSimdCreateBroadcastNode(TYP_SIMD16, gtNewLconNode(2), destFieldType, 16); + saturate_val = gtNewSimdCndSelNode(TYP_SIMD16, saturate_val, max_valDup, tree1, destFieldType, 16); + saturate_val = + gtNewSimdHWIntrinsicNode(dstType, saturate_val, NI_Vector128_ToScalar, destFieldType, 16); + return fgMorphTree(saturate_val); + } + } + // does not work, need to convert into helper function + else if (srcType == TYP_FLOAT && dstType == TYP_UINT) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_FLT2UINT, oper); + } + else if (srcType == TYP_DOUBLE && dstType == TYP_UINT) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); + } + else if (srcType == TYP_DOUBLE && dstType == TYP_INT) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); + } + } + } while (false); + #endif // TARGET_AMD64 // See if the cast has to be done in two steps. R -> I @@ -336,7 +469,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #elif defined(TARGET_AMD64) // Amd64: src = float, dst = uint64 or overflow conversion. // This goes through helper and hence src needs to be converted to double. - && (tree->gtOverflow() || (dstType == TYP_ULONG)) + && (tree->gtOverflow() || ((dstType == TYP_INT || dstType == TYP_ULONG || dstType == TYP_LONG) && + !compOpportunisticallyDependsOn(InstructionSet_AVX512F))) #elif defined(TARGET_ARM) // Arm: src = float, dst = int64/uint64 or overflow conversion. && (tree->gtOverflow() || varTypeIsLong(dstType)) @@ -371,26 +505,43 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) switch (dstType) { case TYP_INT: +#ifdef TARGET_XARCH + if (!tree->IsSaturatedConversion()) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); + } +#endif // TARGET_XARCH return nullptr; case TYP_UINT: -#if defined(TARGET_ARM) || defined(TARGET_AMD64) +#if defined(TARGET_ARM) return nullptr; #else // TARGET_X86 - oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG); + if (tree->IsSaturatedConversion()) + { + return nullptr; + } + /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG); tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT); - return fgMorphTree(tree); + return fgMorphTree(tree);*/ + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); #endif // TARGET_X86 case TYP_LONG: -#ifdef TARGET_AMD64 - // SSE2 has instructions to convert a float/double directly to a long +#ifdef TARGET_XARCH + if (!tree->IsSaturatedConversion()) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); + } return nullptr; -#else // !TARGET_AMD64 +#endif // TARGET_XARCH return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); -#endif // !TARGET_AMD64 case TYP_ULONG: +#ifdef TARGET_AMD64 + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + return nullptr; +#endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index f06b38736ddad..d1e157ab8dbef 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -513,23 +513,45 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (intrinsic) { #if defined(TARGET_XARCH) + case NI_VectorT_ConvertToDouble: + { +#ifdef TARGET_AMD64 + if ((varTypeIsLong(simdBaseType) && IsBaselineVector512IsaSupportedOpportunistically()) || + (simdBaseType == TYP_FLOAT && ((simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX)) || + (simdSize == 64 && IsBaselineVector512IsaSupportedOpportunistically())))) + { + break; + } +#endif // TARGET_AMD64 + return nullptr; + } + + case NI_VectorT_ConvertToInt32: case NI_VectorT_ConvertToInt64: case NI_VectorT_ConvertToUInt32: case NI_VectorT_ConvertToUInt64: { - // TODO-XARCH-CQ: These intrinsics should be accelerated +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + break; + } +#endif // TARGET_AMD64 return nullptr; } case NI_VectorT_ConvertToSingle: { - if (simdBaseType == TYP_UINT) +#ifdef TARGET_AMD64 + if ((simdBaseType == TYP_INT && ((simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX)) || + (simdSize == 64 && IsBaselineVector512IsaSupportedOpportunistically()))) || + (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())) { - // TODO-XARCH-CQ: These intrinsics should be accelerated - return nullptr; + break; } - break; +#endif // TARGET_AMD64 + return nullptr; } #endif // TARGET_XARCH @@ -1154,50 +1176,269 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } #if defined(TARGET_XARCH) + + case NI_VectorT_ConvertToInt64: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_DOUBLE); +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b1000: Saturate to Zero + // SNAN: 0b1000: Saturate to Zero + // ZERO: 0b0000 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + for (int i = 0; i < 8; i++) + { + tbl->gtSimdVal.i64[i] = 0x00000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + // GenTree* op2Clone; + // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL, + // nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion")); + + // run vfixupimmsd base on table and no flags reporting + GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + GenTree* max_val = + gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast(INT64_MAX)), + simdBaseJitType, simdSize); + GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG), + CORINFO_TYPE_LONG, simdSize); + // we will be using the input value twice + GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); + + // usage 1 --> compare with max value of integer + saturate_val = + gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); + // cast it + + NamedIntrinsic intrinsic = + (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation + : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + + GenTree* retNode = + gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); + + // usage 2 --> use thecompared mask with input value and max value to blend + return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG, + simdSize); + } +#endif // TARGET_AMD64 + return nullptr; + } + + case NI_VectorT_ConvertToUInt32: + { + assert(sig->numArgs == 1); + assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT)); +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b0000: + // SNAN: 0b0000 + // ZERO: 0b0000: + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b1000: Saturate to Zero + // +VAL: 0b0000 + for (int i = 0; i < 16; i++) + { + tbl->gtSimdVal.i32[i] = 0x08000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + + // run vfixupimmsd base on table and no flags reporting + GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + intrinsic = (simdSize == 16) + ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation + : NI_AVX512F_ConvertToVector512UInt32WithTruncation; + + return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + } +#endif // TARGET_AMD64 + return nullptr; + } + + case NI_VectorT_ConvertToUInt64: + { + assert(sig->numArgs == 1); + assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT)); +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) + { + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b0000: + // SNAN: 0b0000 + // ZERO: 0b0000: + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b1000: Saturate to Zero + // +VAL: 0b0000 + for (int i = 0; i < 8; i++) + { + tbl->gtSimdVal.i64[i] = 0x08000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + + // run vfixupimmsd base on table and no flags reporting + GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + intrinsic = (simdSize == 16) + ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation + : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + + return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + } +#endif // TARGET_AMD64 + return nullptr; + } + case NI_VectorT_ConvertToInt32: { assert(simdBaseType == TYP_FLOAT); - NamedIntrinsic convert; - - switch (simdSize) +#ifdef TARGET_AMD64 + if (IsBaselineVector512IsaSupportedOpportunistically()) { - case 16: - convert = NI_SSE2_ConvertToVector128Int32WithTruncation; - break; - case 32: - convert = NI_AVX_ConvertToVector256Int32WithTruncation; - break; - case 64: - convert = NI_AVX512F_ConvertToVector512Int32WithTruncation; - break; - default: - unreached(); + var_types simdType = getSIMDTypeForSize(simdSize); + // Generate the control table for VFIXUPIMMSD + // The behavior we want is to saturate negative values to 0. + GenTreeVecCon* tbl = gtNewVconNode(simdType); + + // QNAN: 0b1000: Saturate to Zero + // SNAN: 0b1000: Saturate to Zero + // ZERO: 0b0000 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + for (int i = 0; i < 16; i++) + { + tbl->gtSimdVal.i32[i] = 0x00000088; + } + + // Generate first operand + // The logic is that first and second operand are basically the same because we want + // the output to be in the same xmm register + // Hence we clone the first operand + GenTree* op2Clone = fgMakeMultiUse(&op1); + // GenTree* op2Clone; + // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL, + // nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion")); + + // run vfixupimmsd base on table and no flags reporting + GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdBaseJitType, simdSize); + + GenTree* max_val = + gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast(INT32_MAX)), + simdBaseJitType, simdSize); + GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT), + CORINFO_TYPE_INT, simdSize); + // we will be using the input value twice + GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); + + // usage 1 --> compare with max value of integer + saturate_val = + gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); + // cast it + + NamedIntrinsic intrinsic = + (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation + : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation + : NI_AVX512F_ConvertToVector512Int32WithTruncation; + + GenTree* retNode = + gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); + + // usage 2 --> use thecompared mask with input value and max value to blend + return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT, + simdSize); } +#endif // TARGET_AMD64 + return nullptr; + } - return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize); + case NI_VectorT_ConvertToDouble: + { + assert(sig->numArgs == 1); + assert(varTypeIsLong(simdBaseType) || simdBaseType == TYP_FLOAT); + if (varTypeIsLong(simdBaseType)) + { + intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Double + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Double + : NI_AVX512DQ_ConvertToVector512Double; + } + else + { + intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Double + : (simdSize == 32) ? NI_AVX_ConvertToVector256Double + : NI_AVX512F_ConvertToVector512Double; + } + return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); } case NI_VectorT_ConvertToSingle: { - assert(simdBaseType == TYP_INT); - NamedIntrinsic convert; + assert(varTypeIsInt(simdBaseType)); + NamedIntrinsic intrinsic = NI_Illegal; - switch (simdSize) + if (simdBaseType == TYP_INT) { - case 16: - convert = NI_SSE2_ConvertToVector128Single; - break; - case 32: - convert = NI_AVX_ConvertToVector256Single; - break; - case 64: - convert = NI_AVX512F_ConvertToVector512Single; - break; - default: - unreached(); + intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Single + : (simdSize == 32) ? NI_AVX_ConvertToVector256Single + : NI_AVX512F_ConvertToVector512Single; } - - return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize); + else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()) + { + intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single + : NI_AVX512F_ConvertToVector512Single; + } + return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) case NI_VectorT_ConvertToDouble: diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 5b9e74e91b242..945a78333e8ef 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1537,6 +1537,7 @@ void HelperCallProperties::init() case CORINFO_HELP_LNG2DBL: case CORINFO_HELP_ULNG2DBL: case CORINFO_HELP_DBL2LNG: + case CORINFO_HELP_FLT2UINT: case CORINFO_HELP_DBL2ULNG: case CORINFO_HELP_FLTREM: case CORINFO_HELP_DBLREM: diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 4a8ca85aa3e58..fa2dbfc80168d 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12776,6 +12776,11 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call) hasOverflowCheck = true; break; + case CORINFO_HELP_FLT2UINT: + castToType = TYP_UINT; + castFromType = TYP_FLOAT; + break; + case CORINFO_HELP_DBL2UINT_OVF: castToType = TYP_UINT; castFromType = TYP_DOUBLE; @@ -13084,6 +13089,7 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) case CORINFO_HELP_DBL2INT_OVF: case CORINFO_HELP_DBL2LNG: case CORINFO_HELP_DBL2LNG_OVF: + case CORINFO_HELP_FLT2UINT: case CORINFO_HELP_DBL2UINT_OVF: case CORINFO_HELP_DBL2ULNG: case CORINFO_HELP_DBL2ULNG_OVF: diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index 6491813e3ed4e..cd25f6731b2b0 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -5,6 +5,53 @@ #include "CommonMacros.h" #include "rhassert.h" +// +// Floating point and 64-bit integer math helpers. +// + + +EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val) +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + const double int64_min = (double)INT64_MIN; + const double int64_max = (double)INT64_MAX; + return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val; +#else + return (int64_t)val; +#endif //HOST_X86 || HOST_AMD64 +} + +EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpDbl2Int(double val) +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + const double int32_min = (double)INT32_MIN - 1.0; + const double int32_max = -2.0 * (double)INT32_MIN; + return (val!= val) ? 0 : (val <= int32_min) ? INT32_MIN : (val >= int32_max) ? INT32_MAX : (int32_t)val; +#else + return (int32_t)val; +#endif //HOST_X86 || HOST_AMD64 +} + +EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val) +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + const double uint32_max_plus_1 = -2.0 * (double)INT32_MIN; + return (val < 0) ? 0 : (val != val || val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val; +#else + return (uint32_t)val; +#endif //HOST_X86 || HOST_AMD64 +} + +EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpFlt2UInt(float val) +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + const float uint32_max_plus_1 = -2.0 * (float)INT32_MIN; + return (val != val || val < 0) ? 0 : (val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val; +#else + return (uint32_t)val; +#endif //HOST_X86 || HOST_AMD64 +} + #undef min #undef max #include diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index a37945534865b..4e8a5bba4d5a8 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -311,6 +311,7 @@ public enum ReadyToRunHelper Dbl2UIntOvf = 0xD5, Dbl2ULng = 0xD6, Dbl2ULngOvf = 0xD7, + Flt2UInt = 0xD8, // Floating point ops DblRem = 0xE0, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 5346806c1aff6..547ac19096cca 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -36,6 +36,7 @@ public enum CorInfoHelpFunc CORINFO_HELP_DBL2LNG_OVF, CORINFO_HELP_DBL2UINT, CORINFO_HELP_DBL2UINT_OVF, + CORINFO_HELP_FLT2UINT, CORINFO_HELP_DBL2ULNG, CORINFO_HELP_DBL2ULNG_OVF, CORINFO_HELP_FLTREM, diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 8d5b1357d9e3e..ce321a467b309 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -177,6 +177,9 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.Dbl2ULng: methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULong"); break; + case ReadyToRunHelper.Flt2UInt: + mangledName = "RhpFlt2UInt"; + break; case ReadyToRunHelper.Dbl2IntOvf: methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToIntOverflow"); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index ad83b1eb42a5d..0524c68181cae 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1150,6 +1150,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT: id = ReadyToRunHelper.Dbl2UInt; break; + case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT: + id = ReadyToRunHelper.Flt2UInt; + break; case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF: id = ReadyToRunHelper.Dbl2UIntOvf; break; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 0eae2f10cb8f0..022d17c662257 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1909,6 +1909,10 @@ private void ParseHelper(StringBuilder builder) builder.Append("DBL2UINT"); break; + case ReadyToRunHelper.Flt2UInt: + builder.Append("FLT2UINT"); + break; + case ReadyToRunHelper.Dbl2UIntOvf: builder.Append("DBL2UINTOVF"); break; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 1d2f977c4c167..4484b90c0c035 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -678,6 +678,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT: id = ReadyToRunHelper.Dbl2UInt; break; + case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT: + id = ReadyToRunHelper.Flt2UInt; + break; case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF: id = ReadyToRunHelper.Dbl2UIntOvf; break; From 12cc04f589411853e157e079b3e5fceb41bbb8df Mon Sep 17 00:00:00 2001 From: Deepak Rajendrakumaran Date: Thu, 29 Feb 2024 16:11:24 -0800 Subject: [PATCH 05/14] Fixing behaviour in tests. --- .../out_of_range_fp_to_int_conversions.cpp | 18 ++++----------- .../out_of_range_fp_to_int_conversions.cs | 22 ++++--------------- .../CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il | 3 +++ .../CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il | 3 +++ .../JitBlue/Runtime_62692/Runtime_62692.cs | 4 ++-- 5 files changed, 16 insertions(+), 34 deletions(-) diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index eaf7f2fa1a9da..de7f2c19577c1 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -30,12 +30,12 @@ extern "C" DLLEXPORT int32_t ConvertDoubleToInt32(double x, FPtoIntegerConversio switch (t) { case CONVERT_BACKWARD_COMPATIBLE: - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case CONVERT_SENTINEL: return ((x != x) || (x < INT32_MIN) || (x > INT32_MAX)) ? INT32_MIN : (int32_t)x; case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32: case CONVERT_SATURATING: + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return (x != x) ? 0 : (x < INT32_MIN) ? INT32_MIN : (x > INT32_MAX) ? INT32_MAX : (int32_t)x; case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; @@ -53,7 +53,6 @@ extern "C" DLLEXPORT uint32_t ConvertDoubleToUInt32(double x, FPtoIntegerConvers const double int64_max_plus_1 = 0x1.p63; // 0x43e0000000000000 // (uint64_t)INT64_MAX + 1; switch (t) { - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case CONVERT_BACKWARD_COMPATIBLE: return ((x != x) || (x < INT64_MIN) || (x >= int64_max_plus_1)) ? 0 : (uint32_t)(int64_t)x; @@ -62,6 +61,7 @@ extern "C" DLLEXPORT uint32_t ConvertDoubleToUInt32(double x, FPtoIntegerConvers case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32: case CONVERT_SATURATING: + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return ((x != x) || (x < 0)) ? 0 : (x > UINT32_MAX) ? UINT32_MAX : (uint32_t)x; case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; @@ -95,7 +95,6 @@ extern "C" DLLEXPORT int64_t ConvertDoubleToInt64(double x, FPtoIntegerConversio const double int32_max_plus1 = ((double)INT32_MAX) + 1; switch (t) { - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case CONVERT_BACKWARD_COMPATIBLE: case CONVERT_SENTINEL: return ((x != x) || (x < INT64_MIN) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x; @@ -111,6 +110,7 @@ extern "C" DLLEXPORT int64_t ConvertDoubleToInt64(double x, FPtoIntegerConversio } case CONVERT_SATURATING: + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return (x != x) ? 0 : (x < INT64_MIN) ? INT64_MIN : (x >= int64_max_plus_1) ? INT64_MAX : (int64_t)x; case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; @@ -140,6 +140,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x; case CONVERT_SATURATING: + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return ((x != x) || (x < 0)) ? 0 : (x >= uint64_max_plus_1) ? UINT64_MAX : (uint64_t)x; case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32: @@ -154,17 +155,6 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver } } - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - if (x < int64_max_plus_1) - { - return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x; - } - else - { - x -= int64_max_plus_1; - x = trunc(x); - return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000); - } case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index 5b78783c09e4c..1f75c4dbaef32 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -87,11 +87,11 @@ public static int ConvertDoubleToInt32(double x, FPtoIntegerConversionType t) switch (t) { - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE: case FPtoIntegerConversionType.CONVERT_SENTINEL: return (Double.IsNaN(x) || (x int.MaxValue)) ? int.MinValue: (int) x; + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_SATURATING: case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32: return Double.IsNaN(x) ? 0 : (x< int.MinValue) ? int.MinValue : (x > int.MaxValue) ? int.MaxValue : (int) x; @@ -109,13 +109,13 @@ public static uint ConvertDoubleToUInt32(double x, FPtoIntegerConversionType t) switch (t) { - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE: return (Double.IsNaN(x) || (x < long.MinValue) || (x >= llong_max_plus_1)) ? 0 : (uint)(long)x; case FPtoIntegerConversionType.CONVERT_SENTINEL: return (Double.IsNaN(x) || (x < 0) || (x > uint.MaxValue)) ? uint.MaxValue : (uint)x; + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_SATURATING: case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32: return (Double.IsNaN(x) || (x < 0)) ? 0 : (x > uint.MaxValue) ? uint.MaxValue : (uint)x; @@ -136,7 +136,6 @@ public static long ConvertDoubleToInt64(double x, FPtoIntegerConversionType t) switch (t) { - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE: case FPtoIntegerConversionType.CONVERT_SENTINEL: return (Double.IsNaN(x) || (x < long.MinValue) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x; @@ -151,6 +150,7 @@ public static long ConvertDoubleToInt64(double x, FPtoIntegerConversionType t) return -(long)CppNativeArm32ConvertDoubleToUInt64(-x); } + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_SATURATING: return Double.IsNaN(x) ? 0 : (x < long.MinValue) ? long.MinValue : (x >= llong_max_plus_1) ? long.MaxValue : (long)x; } @@ -185,6 +185,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) case FPtoIntegerConversionType.CONVERT_SENTINEL: return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x; + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: case FPtoIntegerConversionType.CONVERT_SATURATING: return (Double.IsNaN(x) || (x < 0)) ? 0 : (x >= ullong_max_plus_1) ? ulong.MaxValue : (ulong)x; @@ -199,21 +200,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - - if (x < two63) - { - return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x; - } - else - { - // (double)LLONG_MAX cannot be represented exactly as double - const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1); - x -= two63; - x = Math.Truncate(x); - return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000); - } } return 0; diff --git a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il index b8ccece0a1d6f..ff132dd868596 100644 --- a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il +++ b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il @@ -48,6 +48,9 @@ End_Orphan_3: } catch [mscorlib]System.OverflowException { pop leave the_end +} catch [mscorlib]System.DivideByZeroException { + pop + leave the_end } the_end: ldc.i4 100 diff --git a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il index 65f3bc2af34f6..0422a59b02052 100644 --- a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il +++ b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il @@ -684,6 +684,9 @@ leave END } catch [mscorlib]System.OverflowException { pop leave END +} catch [mscorlib]System.DivideByZeroException { + pop + leave END } END: ldc.i4 100 diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs b/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs index 5b85cbb0115a0..22fb1e0bde69c 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs @@ -39,8 +39,8 @@ public static int TestEntryPoint() AssertEqual(Problem2(1111, 0xFFFF_FFFF_0000_0001), 3414328792); AssertEqual(Problem3(1, 0xFFFF_0001), 0); AssertEqual(Problem4(1111, 0xFFFF_FFFF_0000_0001), 3414328792); - AssertEqual(Problem5(1111, double.MaxValue), 3307008522); - AssertEqual(Problem6(1111, float.MaxValue), 3307008522); + AssertEqual(Problem5(1111, double.MaxValue), 1921271346); + AssertEqual(Problem6(1111, float.MaxValue), 1921271346); AssertEqual(Problem5(1111, double.MinValue), 3307008522); AssertEqual(Problem6(1111, float.MinValue), 3307008522); AssertEqual(Problem5(1111, -0.0), 3307008522); From 5c1ebdbbb5f0eb4f3d21398c16cf633ac6297412 Mon Sep 17 00:00:00 2001 From: Deepak Rajendrakumaran Date: Fri, 1 Mar 2024 15:33:05 -0800 Subject: [PATCH 06/14] Merging with main Fixing edge cases --- src/coreclr/jit/morph.cpp | 3 + src/coreclr/jit/utils.cpp | 2 + src/coreclr/jit/valuenum.cpp | 18 +++++ src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 65 +++++-------------- .../ILCompiler.Compiler/Compiler/JitHelper.cs | 16 ++--- src/coreclr/vm/corelib.h | 1 + src/coreclr/vm/ecall.cpp | 4 ++ src/coreclr/vm/jithelpers.cpp | 6 +- .../System.Private.CoreLib/src/System/Math.cs | 16 ++++- 9 files changed, 69 insertions(+), 62 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4b58a8b6899e8..cc195cddd7223 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -447,6 +447,9 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) } else if (srcType == TYP_DOUBLE && dstType == TYP_UINT) { + /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG); + tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT); + return fgMorphTree(tree);*/ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); } else if (srcType == TYP_DOUBLE && dstType == TYP_INT) diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 945a78333e8ef..9416cf0c061af 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1536,6 +1536,8 @@ void HelperCallProperties::init() case CORINFO_HELP_LMUL: case CORINFO_HELP_LNG2DBL: case CORINFO_HELP_ULNG2DBL: + case CORINFO_HELP_DBL2INT: + case CORINFO_HELP_DBL2UINT: case CORINFO_HELP_DBL2LNG: case CORINFO_HELP_FLT2UINT: case CORINFO_HELP_DBL2ULNG: diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index fa2dbfc80168d..2858ab7575c88 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12740,6 +12740,7 @@ void Compiler::fgValueNumberCall(GenTreeCall* call) void Compiler::fgValueNumberCastHelper(GenTreeCall* call) { + // printf("\n Deepak fgValueNumberCastHelper IN \n"); CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); var_types castToType = TYP_UNDEF; var_types castFromType = TYP_UNDEF; @@ -12770,6 +12771,16 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call) castFromType = TYP_DOUBLE; break; + case CORINFO_HELP_DBL2INT: + castToType = TYP_INT; + castFromType = TYP_DOUBLE; + break; + + case CORINFO_HELP_DBL2UINT: + castToType = TYP_UINT; + castFromType = TYP_DOUBLE; + break; + case CORINFO_HELP_DBL2LNG_OVF: castToType = TYP_LONG; castFromType = TYP_DOUBLE; @@ -13081,6 +13092,8 @@ VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc) bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) { CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + // printf("\n Deepak Call = [%06u], helpFunc = %d \n", dspTreeID(call), (int)helpFunc); + // printTreeID(call); switch (helpFunc) { @@ -13088,6 +13101,8 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) case CORINFO_HELP_ULNG2DBL: case CORINFO_HELP_DBL2INT_OVF: case CORINFO_HELP_DBL2LNG: + case CORINFO_HELP_DBL2INT: + case CORINFO_HELP_DBL2UINT: case CORINFO_HELP_DBL2LNG_OVF: case CORINFO_HELP_FLT2UINT: case CORINFO_HELP_DBL2UINT_OVF: @@ -13117,7 +13132,10 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) break; default: + { + // printf("\n Deepak DEFAULT!! \n"); break; + } } bool pure = s_helperCallProperties.IsPure(helpFunc); diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index cd25f6731b2b0..120c18e48f17c 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -5,53 +5,6 @@ #include "CommonMacros.h" #include "rhassert.h" -// -// Floating point and 64-bit integer math helpers. -// - - -EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val) -{ -#if defined(HOST_X86) || defined(HOST_AMD64) - const double int64_min = (double)INT64_MIN; - const double int64_max = (double)INT64_MAX; - return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val; -#else - return (int64_t)val; -#endif //HOST_X86 || HOST_AMD64 -} - -EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpDbl2Int(double val) -{ -#if defined(HOST_X86) || defined(HOST_AMD64) - const double int32_min = (double)INT32_MIN - 1.0; - const double int32_max = -2.0 * (double)INT32_MIN; - return (val!= val) ? 0 : (val <= int32_min) ? INT32_MIN : (val >= int32_max) ? INT32_MAX : (int32_t)val; -#else - return (int32_t)val; -#endif //HOST_X86 || HOST_AMD64 -} - -EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val) -{ -#if defined(HOST_X86) || defined(HOST_AMD64) - const double uint32_max_plus_1 = -2.0 * (double)INT32_MIN; - return (val < 0) ? 0 : (val != val || val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val; -#else - return (uint32_t)val; -#endif //HOST_X86 || HOST_AMD64 -} - -EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpFlt2UInt(float val) -{ -#if defined(HOST_X86) || defined(HOST_AMD64) - const float uint32_max_plus_1 = -2.0 * (float)INT32_MIN; - return (val != val || val < 0) ? 0 : (val >= uint32_max_plus_1) ? UINT32_MAX : (uint32_t)val; -#else - return (uint32_t)val; -#endif //HOST_X86 || HOST_AMD64 -} - #undef min #undef max #include @@ -129,14 +82,26 @@ EXTERN_C int64_t REDHAWK_CALLCONV RhpLLsh(int64_t i, int32_t j) return i << (j & 0x3f); } +EXTERN_C double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val) +{ + return (double)val; +} + EXTERN_C int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val) { return (int64_t)val; } -EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val) +#else // HOST_X86 || HOST_AMD64 + +EXTERN_C int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val) { - return (double)val; + const double int64_min = (double)INT64_MIN; + const double int64_max = (double)INT64_MAX; + return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val; } +#endif + + + -#endif // HOST_ARM diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index ce321a467b309..31434ca0f979b 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -177,8 +177,15 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.Dbl2ULng: methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToULong"); break; + case ReadyToRunHelper.Dbl2UInt: + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToUInt"); + break; + case ReadyToRunHelper.Dbl2Int: + methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt"); + break; case ReadyToRunHelper.Flt2UInt: - mangledName = "RhpFlt2UInt"; + methodDesc = context.GetHelperEntryPoint("System", "Math", "FloatToUInt"); + //mangledName = "RhpFlt2UInt"; break; case ReadyToRunHelper.Dbl2IntOvf: @@ -200,13 +207,6 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.FltRem: methodDesc = context.GetHelperEntryPoint("System", "MathF", "FloatReminder"); break; - - case ReadyToRunHelper.Dbl2Int: - methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt"); - break; - case ReadyToRunHelper.Dbl2UInt: - methodDesc = context.GetHelperEntryPoint("System", "MathF", "DoubleToUInt"); - break; case ReadyToRunHelper.DblRound: DefType doubleType = context.GetWellKnownType(WellKnownType.Double); methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("Round", diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index eef4d60b64b87..a77b7fbd56da1 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -274,6 +274,7 @@ DEFINE_METHOD(MATH, DOUBLE_TO_ULONG_OVERFLOW,DoubleToULongOverfl DEFINE_METHOD(MATH, DOUBLE_REMINDER, DoubleReminder, NoSig) DEFINE_METHOD(MATH, DOUBLE_TO_INT, DoubleToInt, NoSig) DEFINE_METHOD(MATH, DOUBLE_TO_UINT, DoubleToUInt, NoSig) +DEFINE_METHOD(MATH, FLOAT_TO_UINT, FloatToUInt, NoSig) DEFINE_METHOD(MATH, ROUND, Round, SM_Dbl_RetDbl) DEFINE_CLASS(MATHF, System, MathF) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index fa8cf590c28d6..6c0119a99b3de 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -205,6 +205,10 @@ void ECall::PopulateManagedHelpers() pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_DBL2UINT, pDest); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__FLOAT_TO_UINT)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_FLT2UINT, pDest); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 092dc342547bb..70c1b30c4f3f3 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -388,8 +388,10 @@ HCIMPLEND HCIMPL1_V(INT64, JIT_Dbl2Lng, double val) { FCALL_CONTRACT; - - return (INT64)val; + const double int64_min = (double)INT64_MIN; + const double int64_max = (double)INT64_MAX; + return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (INT64)val; + //return (INT64)val; } HCIMPLEND diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index 34211170f9c3d..c154343331a03 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1735,7 +1735,19 @@ private static double DoubleReminder(double dividend, double divisor) return FMod(dividend, divisor); } - private static int DoubleToInt(double val) => (int)(long)val; - private static uint DoubleToUInt(double val) => (uint)(long)val; + private static int DoubleToInt(double val) + { + return double.IsNaN(val) || ((long)val < int.MinValue) ? int.MinValue : ((long)val > int.MaxValue) ? int.MaxValue : (int)(long)val; + } + private static uint DoubleToUInt(double val) + { + return double.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val; + } + + private static uint FloatToUInt(float val) + { + //return 0; + return float.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val; + } } } From 3e006be4308e2d9057f8f1524579b8c1d59d6b1d Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 5 Mar 2024 12:01:23 -0800 Subject: [PATCH 07/14] cleanup morph and FloatToUInt. Also, handle negative infinity value when converting to uint/ulong --- docs/design/coreclr/botr/readytorun-format.md | 1 - src/coreclr/inc/corinfo.h | 1 - src/coreclr/inc/jithelpers.h | 1 - src/coreclr/inc/readytorun.h | 1 - src/coreclr/inc/readytorunhelpers.h | 1 - src/coreclr/jit/hwintrinsicxarch.cpp | 8 ++--- src/coreclr/jit/morph.cpp | 32 ++++--------------- src/coreclr/jit/simdashwintrinsic.cpp | 16 +++++----- src/coreclr/jit/utils.cpp | 1 - src/coreclr/jit/valuenum.cpp | 6 ---- .../Internal/Runtime/ReadyToRunConstants.cs | 1 - .../Common/JitInterface/CorInfoHelpFunc.cs | 1 - .../ILCompiler.Compiler/Compiler/JitHelper.cs | 4 --- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 3 -- .../ReadyToRunSignature.cs | 4 --- .../JitInterface/CorInfoImpl.RyuJit.cs | 3 -- src/coreclr/vm/corelib.h | 1 - src/coreclr/vm/ecall.cpp | 4 --- .../System.Private.CoreLib/src/System/Math.cs | 6 ---- 19 files changed, 18 insertions(+), 77 deletions(-) diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md index c222ab299e54b..a9a5c8b916303 100644 --- a/docs/design/coreclr/botr/readytorun-format.md +++ b/docs/design/coreclr/botr/readytorun-format.md @@ -870,7 +870,6 @@ enum ReadyToRunHelper READYTORUN_HELPER_Dbl2UIntOvf = 0xD5, READYTORUN_HELPER_Dbl2ULng = 0xD6, READYTORUN_HELPER_Dbl2ULngOvf = 0xD7, - READYTORUN_HELPER_Flt2UInt = 0xD8, // Floating point ops READYTORUN_HELPER_DblRem = 0xE0, diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index a935dd2e53f2a..b4417e60af4d5 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -393,7 +393,6 @@ enum CorInfoHelpFunc CORINFO_HELP_DBL2LNG_OVF, CORINFO_HELP_DBL2UINT, // unused CORINFO_HELP_DBL2UINT_OVF, - CORINFO_HELP_FLT2UINT, CORINFO_HELP_DBL2ULNG, CORINFO_HELP_DBL2ULNG_OVF, CORINFO_HELP_FLTREM, diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 8cb878a14a210..e0f65ff5de908 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -59,7 +59,6 @@ DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG_OVF, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT_OVF, NULL, CORINFO_HELP_SIG_8_STACK) - DYNAMICJITHELPER(CORINFO_HELP_FLT2UINT, NULL, CORINFO_HELP_SIG_8_STACK) //Deepak DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_DBL2ULNG_OVF, NULL, CORINFO_HELP_SIG_8_STACK) DYNAMICJITHELPER(CORINFO_HELP_FLTREM, NULL, CORINFO_HELP_SIG_8_STACK) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 0c84b7f3baef0..41a4aa251fa74 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -399,7 +399,6 @@ enum ReadyToRunHelper READYTORUN_HELPER_Dbl2UIntOvf = 0xD5, READYTORUN_HELPER_Dbl2ULng = 0xD6, READYTORUN_HELPER_Dbl2ULngOvf = 0xD7, - READYTORUN_HELPER_Flt2UInt = 0xD8, // Floating point ops READYTORUN_HELPER_DblRem = 0xE0, diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index 695d9d886e261..bbb586e8eb4a3 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -84,7 +84,6 @@ HELPER(READYTORUN_HELPER_Dbl2Lng, CORINFO_HELP_DBL2LNG, HELPER(READYTORUN_HELPER_Dbl2LngOvf, CORINFO_HELP_DBL2LNG_OVF, ) HELPER(READYTORUN_HELPER_Dbl2UInt, CORINFO_HELP_DBL2UINT, ) HELPER(READYTORUN_HELPER_Dbl2UIntOvf, CORINFO_HELP_DBL2UINT_OVF, ) -HELPER(READYTORUN_HELPER_Flt2UInt, CORINFO_HELP_FLT2UINT, ) HELPER(READYTORUN_HELPER_Dbl2ULng, CORINFO_HELP_DBL2ULNG, ) HELPER(READYTORUN_HELPER_Dbl2ULngOvf, CORINFO_HELP_DBL2ULNG_OVF, ) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 7f6469f992916..da7f3af42334e 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1524,13 +1524,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // SNAN: 0b1000 // ZERO: 0b0000: // +ONE: 0b0000 - // -INF: 0b0000 + // -INF: 0b1000 // +INF: 0b0000 // -VAL: 0b1000: Saturate to Zero // +VAL: 0b0000 for (int i = 0; i < 16; i++) { - tbl->gtSimdVal.i32[i] = 0x08000088; + tbl->gtSimdVal.i32[i] = 0x08080088; } // Generate first operand @@ -1573,13 +1573,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // SNAN: 0b1000 // ZERO: 0b0000: // +ONE: 0b0000 - // -INF: 0b0000 + // -INF: 0b1000 // +INF: 0b0000 // -VAL: 0b1000: Saturate to Zero // +VAL: 0b0000 for (int i = 0; i < 8; i++) { - tbl->gtSimdVal.i64[i] = 0x08000088; + tbl->gtSimdVal.i64[i] = 0x08080088; } // Generate first operand diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index cc195cddd7223..7f984051bab11 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -353,11 +353,11 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) // SNAN: 0b1000 // ZERO: 0b0000: // +ONE: 0b0000 - // -INF: 0b0000 + // -INF: 0b1000 // +INF: 0b0000 // -VAL: 0b1000: Saturate to Zero // +VAL: 0b0000 - tbl->gtSimdVal.i32[0] = 0x08000088; + tbl->gtSimdVal.i32[0] = 0x08080088; // Generate first operand // The logic is that first and second operand are basically the same because we want @@ -440,22 +440,6 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) return fgMorphTree(saturate_val); } } - // does not work, need to convert into helper function - else if (srcType == TYP_FLOAT && dstType == TYP_UINT) - { - return fgMorphCastIntoHelper(tree, CORINFO_HELP_FLT2UINT, oper); - } - else if (srcType == TYP_DOUBLE && dstType == TYP_UINT) - { - /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG); - tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT); - return fgMorphTree(tree);*/ - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); - } - else if (srcType == TYP_DOUBLE && dstType == TYP_INT) - { - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); - } } } while (false); @@ -472,14 +456,13 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #elif defined(TARGET_AMD64) // Amd64: src = float, dst = uint64 or overflow conversion. // This goes through helper and hence src needs to be converted to double. - && (tree->gtOverflow() || ((dstType == TYP_INT || dstType == TYP_ULONG || dstType == TYP_LONG) && - !compOpportunisticallyDependsOn(InstructionSet_AVX512F))) + && (tree->gtOverflow() || !compOpportunisticallyDependsOn(InstructionSet_AVX512F)) #elif defined(TARGET_ARM) // Arm: src = float, dst = int64/uint64 or overflow conversion. && (tree->gtOverflow() || varTypeIsLong(dstType)) #else // x86: src = float, dst = uint32/int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT)) + && (tree->gtOverflow() || varTypeIsIntegral(dstType)) #endif ) { @@ -519,16 +502,13 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) case TYP_UINT: #if defined(TARGET_ARM) return nullptr; -#else // TARGET_X86 +#elif defined(TARGET_XARCH) if (tree->IsSaturatedConversion()) { return nullptr; } - /*oper = gtNewCastNode(TYP_LONG, oper, false, TYP_LONG); - tree = gtNewCastNode(TYP_INT, oper, false, TYP_UINT); - return fgMorphTree(tree);*/ +#endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); -#endif // TARGET_X86 case TYP_LONG: #ifdef TARGET_XARCH diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index d1e157ab8dbef..1394501f7da9b 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1256,17 +1256,17 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // The behavior we want is to saturate negative values to 0. GenTreeVecCon* tbl = gtNewVconNode(simdType); - // QNAN: 0b0000: - // SNAN: 0b0000 + // QNAN: 0b1000: + // SNAN: 0b1000 // ZERO: 0b0000: // +ONE: 0b0000 - // -INF: 0b0000 + // -INF: 0b1000 // +INF: 0b0000 // -VAL: 0b1000: Saturate to Zero // +VAL: 0b0000 for (int i = 0; i < 16; i++) { - tbl->gtSimdVal.i32[i] = 0x08000088; + tbl->gtSimdVal.i32[i] = 0x08080088; } // Generate first operand @@ -1302,17 +1302,17 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // The behavior we want is to saturate negative values to 0. GenTreeVecCon* tbl = gtNewVconNode(simdType); - // QNAN: 0b0000: - // SNAN: 0b0000 + // QNAN: 0b1000: + // SNAN: 0b1000 // ZERO: 0b0000: // +ONE: 0b0000 - // -INF: 0b0000 + // -INF: 0b1000 // +INF: 0b0000 // -VAL: 0b1000: Saturate to Zero // +VAL: 0b0000 for (int i = 0; i < 8; i++) { - tbl->gtSimdVal.i64[i] = 0x08000088; + tbl->gtSimdVal.i64[i] = 0x08080088; } // Generate first operand diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 9416cf0c061af..dc55e449c0ed3 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1539,7 +1539,6 @@ void HelperCallProperties::init() case CORINFO_HELP_DBL2INT: case CORINFO_HELP_DBL2UINT: case CORINFO_HELP_DBL2LNG: - case CORINFO_HELP_FLT2UINT: case CORINFO_HELP_DBL2ULNG: case CORINFO_HELP_FLTREM: case CORINFO_HELP_DBLREM: diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 2858ab7575c88..a5a3710d161ca 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12787,11 +12787,6 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call) hasOverflowCheck = true; break; - case CORINFO_HELP_FLT2UINT: - castToType = TYP_UINT; - castFromType = TYP_FLOAT; - break; - case CORINFO_HELP_DBL2UINT_OVF: castToType = TYP_UINT; castFromType = TYP_DOUBLE; @@ -13104,7 +13099,6 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) case CORINFO_HELP_DBL2INT: case CORINFO_HELP_DBL2UINT: case CORINFO_HELP_DBL2LNG_OVF: - case CORINFO_HELP_FLT2UINT: case CORINFO_HELP_DBL2UINT_OVF: case CORINFO_HELP_DBL2ULNG: case CORINFO_HELP_DBL2ULNG_OVF: diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 4e8a5bba4d5a8..a37945534865b 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -311,7 +311,6 @@ public enum ReadyToRunHelper Dbl2UIntOvf = 0xD5, Dbl2ULng = 0xD6, Dbl2ULngOvf = 0xD7, - Flt2UInt = 0xD8, // Floating point ops DblRem = 0xE0, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 547ac19096cca..5346806c1aff6 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -36,7 +36,6 @@ public enum CorInfoHelpFunc CORINFO_HELP_DBL2LNG_OVF, CORINFO_HELP_DBL2UINT, CORINFO_HELP_DBL2UINT_OVF, - CORINFO_HELP_FLT2UINT, CORINFO_HELP_DBL2ULNG, CORINFO_HELP_DBL2ULNG_OVF, CORINFO_HELP_FLTREM, diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 31434ca0f979b..852d859b5a740 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -183,10 +183,6 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.Dbl2Int: methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToInt"); break; - case ReadyToRunHelper.Flt2UInt: - methodDesc = context.GetHelperEntryPoint("System", "Math", "FloatToUInt"); - //mangledName = "RhpFlt2UInt"; - break; case ReadyToRunHelper.Dbl2IntOvf: methodDesc = context.GetHelperEntryPoint("System", "Math", "DoubleToIntOverflow"); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 0524c68181cae..ad83b1eb42a5d 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1150,9 +1150,6 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT: id = ReadyToRunHelper.Dbl2UInt; break; - case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT: - id = ReadyToRunHelper.Flt2UInt; - break; case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF: id = ReadyToRunHelper.Dbl2UIntOvf; break; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 022d17c662257..0eae2f10cb8f0 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1909,10 +1909,6 @@ private void ParseHelper(StringBuilder builder) builder.Append("DBL2UINT"); break; - case ReadyToRunHelper.Flt2UInt: - builder.Append("FLT2UINT"); - break; - case ReadyToRunHelper.Dbl2UIntOvf: builder.Append("DBL2UINTOVF"); break; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 4484b90c0c035..1d2f977c4c167 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -678,9 +678,6 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT: id = ReadyToRunHelper.Dbl2UInt; break; - case CorInfoHelpFunc.CORINFO_HELP_FLT2UINT: - id = ReadyToRunHelper.Flt2UInt; - break; case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF: id = ReadyToRunHelper.Dbl2UIntOvf; break; diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index a77b7fbd56da1..eef4d60b64b87 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -274,7 +274,6 @@ DEFINE_METHOD(MATH, DOUBLE_TO_ULONG_OVERFLOW,DoubleToULongOverfl DEFINE_METHOD(MATH, DOUBLE_REMINDER, DoubleReminder, NoSig) DEFINE_METHOD(MATH, DOUBLE_TO_INT, DoubleToInt, NoSig) DEFINE_METHOD(MATH, DOUBLE_TO_UINT, DoubleToUInt, NoSig) -DEFINE_METHOD(MATH, FLOAT_TO_UINT, FloatToUInt, NoSig) DEFINE_METHOD(MATH, ROUND, Round, SM_Dbl_RetDbl) DEFINE_CLASS(MATHF, System, MathF) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 6c0119a99b3de..fa8cf590c28d6 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -205,10 +205,6 @@ void ECall::PopulateManagedHelpers() pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_DBL2UINT, pDest); - pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__FLOAT_TO_UINT)); - pDest = pMD->GetMultiCallableAddrOfCode(); - SetJitHelperFunction(CORINFO_HELP_FLT2UINT, pDest); - pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest); diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index c154343331a03..d4073a5691911 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1743,11 +1743,5 @@ private static uint DoubleToUInt(double val) { return double.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val; } - - private static uint FloatToUInt(float val) - { - //return 0; - return float.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val; - } } } From 2cc89d772f7e08e2d0e573da8ae7dfbcd2b92b4b Mon Sep 17 00:00:00 2001 From: Deepak Rajendrakumaran Date: Tue, 5 Mar 2024 17:07:06 -0800 Subject: [PATCH 08/14] Handling NaN for double->ulong --- src/libraries/System.Private.CoreLib/src/System/Math.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index d4073a5691911..371f72d7bf099 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1624,6 +1624,9 @@ private static double ULongToDouble(ulong val) private static ulong DoubleToULong(double val) { + if (double.IsNaN(val)) + return 0; + const double two63 = 2147483648.0 * 4294967296.0; ulong ret; if (val < two63) From 51bad69fcd7fd6a7b6a3c2c47994272c56023d85 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 6 Mar 2024 02:17:24 -0800 Subject: [PATCH 09/14] Move System.Math.DoubleToUlong to saturating behavior --- .../System.Private.CoreLib/src/System/Math.cs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index 371f72d7bf099..a0d5c007c9887 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1627,18 +1627,16 @@ private static ulong DoubleToULong(double val) if (double.IsNaN(val)) return 0; - const double two63 = 2147483648.0 * 4294967296.0; - ulong ret; - if (val < two63) + const double two64 = 4294967296.0 * 4294967296; + if (val <= 0.0) { - ret = (ulong)(long)val; + return 0; } - else + if (val >= two64) { - // subtract 0x8000000000000000, do the convert then add it back again - ret = (ulong)(long)(val - two63) + 0x8000000000000000UL; + return ulong.MaxValue; } - return ret; + return (ulong)(long)val; } [StackTraceHidden] From 724be9131d28b6816848d183ccec4094c70d63c3 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 6 Mar 2024 15:23:15 -0800 Subject: [PATCH 10/14] Resolving non AVX512F failure --- .../System.Private.CoreLib/src/System/Math.cs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index a0d5c007c9887..c2967b21e30cc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1624,19 +1624,27 @@ private static double ULongToDouble(ulong val) private static ulong DoubleToULong(double val) { + const double two63 = 2147483648.0 * 4294967296.0; +#if TARGET_X86 || TARGET_AMD64 if (double.IsNaN(val)) return 0; - const double two64 = 4294967296.0 * 4294967296; + const double two64 = 4294967296.0 * 4294967296.0; if (val <= 0.0) { return 0; } - if (val >= two64) + else if (val >= two64) { return ulong.MaxValue; } - return (ulong)(long)val; +#endif //TARGET_X86 || TARGET_AMD64 + if (val < two63) + { + return (ulong)(long)val; + } + // subtract 0x8000000000000000, do the convert then add it back again + return (ulong)(long)(val - two63) + 0x8000000000000000UL; } [StackTraceHidden] From 46eb9c861f120439394d9cdd58c3d1c441a98150 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 6 Mar 2024 20:57:50 -0800 Subject: [PATCH 11/14] Fix DoubleToUint failures and also add doubleToUint helper in checks --- src/coreclr/vm/jitinterface.cpp | 2 ++ src/libraries/System.Private.CoreLib/src/System/Math.cs | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index dbf4aa43de507..cb3eba7360c25 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10697,6 +10697,8 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULMUL_OVF || dynamicFtnNum == DYNAMIC_CORINFO_HELP_ULNG2DBL || dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2ULNG || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2INT || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2UINT || dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2INT_OVF || dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2UINT_OVF || dynamicFtnNum == DYNAMIC_CORINFO_HELP_DBL2LNG_OVF || diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index c2967b21e30cc..a4998d0784e1e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1744,10 +1744,13 @@ private static double DoubleReminder(double dividend, double divisor) return FMod(dividend, divisor); } + [StackTraceHidden] private static int DoubleToInt(double val) { - return double.IsNaN(val) || ((long)val < int.MinValue) ? int.MinValue : ((long)val > int.MaxValue) ? int.MaxValue : (int)(long)val; + return double.IsNaN(val) ? 0 : (val < -2147483648.0) ? int.MinValue : (val >= 2147483648.0) ? int.MaxValue : (int)(long)val; } + + [StackTraceHidden] private static uint DoubleToUInt(double val) { return double.IsNaN(val) || (val < 0) ? 0 : ((ulong)val > uint.MaxValue) ? uint.MaxValue : (uint)(ulong)val; From 4ca8a860ce7f37b3874b7f4c8c01cb55a0991512 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 7 Mar 2024 00:14:50 -0800 Subject: [PATCH 12/14] Fix arm failure and nativeAOT --- .../IL/ILImporter.Scanner.cs | 17 +++++++++++++++++ src/coreclr/vm/jithelpers.cpp | 7 +++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs index e152de9cb404a..9d172a763c76e 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs @@ -1273,11 +1273,19 @@ private void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool { case WellKnownType.SByte: case WellKnownType.Int16: + if (checkOverflow) + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2IntOvf), "conv_i4_ovf"); + } + break; case WellKnownType.Int32: if (checkOverflow) { _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2IntOvf), "conv_i4_ovf"); } + else{ + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2Int), "conv_i4"); + } break; case WellKnownType.Int64: if (checkOverflow) @@ -1287,11 +1295,20 @@ private void ImportConvert(WellKnownType wellKnownType, bool checkOverflow, bool break; case WellKnownType.Byte: case WellKnownType.UInt16: + if (checkOverflow) + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UIntOvf), "conv_u8_ovf"); + } + break; case WellKnownType.UInt32: if (checkOverflow) { _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UIntOvf), "conv_u8_ovf"); } + else + { + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Dbl2UInt), "conv_u4"); + } break; case WellKnownType.UInt64: if (checkOverflow) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 70c1b30c4f3f3..7e6118b3a8149 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -388,10 +388,13 @@ HCIMPLEND HCIMPL1_V(INT64, JIT_Dbl2Lng, double val) { FCALL_CONTRACT; +#if defined(TARGET_X86) || defined(TARGET_AMD64) const double int64_min = (double)INT64_MIN; const double int64_max = (double)INT64_MAX; - return (val!= val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (INT64)val; - //return (INT64)val; + return (val != val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (INT64)val; +#else // TARGET_ARM + return (INT64)val; +#endif //TARGET_X86 || TARGET_AMD64 } HCIMPLEND From d47fd37f89d21faf174281d42e7eb81fbd5db3a0 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 7 Mar 2024 19:15:23 -0800 Subject: [PATCH 13/14] Add gtSimdCvtNode to cleanup the code and enable new implementation in X86 --- src/coreclr/jit/compiler.h | 12 ++ src/coreclr/jit/gentree.cpp | 123 +++++++++++++++ src/coreclr/jit/hwintrinsicxarch.cpp | 185 ++--------------------- src/coreclr/jit/simdashwintrinsic.cpp | 206 +++----------------------- 4 files changed, 170 insertions(+), 356 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3e789dc3a474c..aebb563ca3c8d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3172,6 +3172,18 @@ class Compiler GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize); + + GenTree* gtNewSimdCvtNode(var_types type, + GenTree* op1, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdTargetBaseJitType, + CorInfoType simdSourceBaseJitType, + unsigned simdSize); + + GenTreeVecCon* gtCvtCtrlTbl(var_types type, + var_types sourceType, + var_types targetType, + unsigned simdSize); GenTree* gtNewSimdCreateBroadcastNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ffb461ec4d829..0a3a8e89a8c4a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21251,6 +21251,129 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } +#if defined(TARGET_XARCH) +GenTreeVecCon* Compiler::gtCvtCtrlTbl + (var_types type, var_types sourceType, var_types targetType, unsigned simdSize) +{ + assert(IsBaselineSimdIsaSupportedDebugOnly()); + assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(varTypeIsFloating(sourceType)); + assert(varTypeIsIntegral(targetType)); + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + + GenTreeVecCon* tbl = gtNewVconNode(type); + + switch (sourceType) + { + case TYP_DOUBLE: + switch (targetType) + { + case TYP_UINT: + case TYP_ULONG: + for (int i = 0; i < 8; i++) + { + tbl->gtSimdVal.i64[i] = 0x08080088; + } + break; + + case TYP_INT: + case TYP_LONG: + for (int i = 0; i < 8; i++) + { + tbl->gtSimdVal.i64[i] = 0x00000088; + } + break; + + default: + unreached(); + } + break; + + case TYP_FLOAT: + switch (targetType) + { + case TYP_UINT: + case TYP_ULONG: + for (int i = 0; i < 16; i++) + { + tbl->gtSimdVal.i32[i] = 0x08080088; + } + break; + + case TYP_INT: + case TYP_LONG: + for (int i = 0; i < 16; i++) + { + tbl->gtSimdVal.i32[i] = 0x00000088; + } + break; + + default: + unreached(); + } + break; + + default: + unreached(); + } + return tbl; +} + +GenTree* Compiler::gtNewSimdCvtNode + (var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, CorInfoType simdTargetBaseJitType, CorInfoType simdSourceBaseJitType, unsigned simdSize) +{ + assert(IsBaselineSimdIsaSupportedDebugOnly()); + assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + var_types simdSourceBaseType = JitType2PreciseVarType(simdSourceBaseJitType); + var_types simdTargetBaseType = JitType2PreciseVarType(simdTargetBaseJitType); + assert(varTypeIsArithmetic(simdSourceBaseType)); + assert(varTypeIsArithmetic(simdTargetBaseType)); + assert(varTypeIsFloating(simdSourceBaseType)); + assert(varTypeIsIntegral(simdTargetBaseType)); + + GenTreeVecCon* tbl = gtCvtCtrlTbl(type, simdSourceBaseType, simdTargetBaseType, simdSize); + + GenTree* op1Clone = fgMakeMultiUse(&op1); + + // run vfixupimmsd base on table and no flags reporting + GenTree* fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tbl, gtNewIconNode(0), + NI_AVX512F_Fixup, simdSourceBaseJitType, simdSize); + + if (varTypeIsSigned(simdTargetBaseType)) + { + ssize_t actualMaxVal = (varTypeIsInt(simdTargetBaseType)) ? INT32_MAX : INT64_MAX; + + GenTree* maxVal = gtNewDconNode(static_cast(actualMaxVal), simdSourceBaseType); + + maxVal = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize); + GenTree* maxValDup = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal, simdTargetBaseType), simdTargetBaseJitType, simdSize); + + // we will be using the input value twice + GenTree* fixupValDup = fgMakeMultiUse(&fixupVal); + + // usage 1 --> compare with max value of integer + fixupVal = gtNewSimdCmpOpNode(GT_GE, type, fixupVal, maxVal, simdSourceBaseJitType, simdSize); + // cast it + + GenTree* castNode = gtNewSimdHWIntrinsicNode(type, fixupValDup, hwIntrinsicID, simdSourceBaseJitType, simdSize); + + // usage 2 --> use thecompared mask with input value and max value to blend + return gtNewSimdCndSelNode(type, fixupVal, maxValDup, castNode, simdTargetBaseJitType, simdSize); + } + else + { + return gtNewSimdHWIntrinsicNode(type, fixupVal, hwIntrinsicID, simdSourceBaseJitType, simdSize); + } +} +#endif //TARGET_XARCH + GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index da7f3af42334e..3b9236ec8906e 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1446,61 +1446,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); -#ifdef TARGET_AMD64 if (IsBaselineVector512IsaSupportedOpportunistically()) { op1 = impSIMDPopStack(); - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: Saturate to Zero - // SNAN: 0b1000: Saturate to Zero - // ZERO: 0b0000 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - for (int i = 0; i < 8; i++) - { - tbl->gtSimdVal.i64[i] = 0x00000088; - } - - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - - // run vfixupimmsd base on table and no flags reporting - GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - - GenTree* max_val = - gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast(INT64_MAX)), - simdBaseJitType, simdSize); - GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG), - CORINFO_TYPE_LONG, simdSize); - // we will be using the input value twice - GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); - - // usage 1 --> compare with max value of integer - saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); - // cast it - intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation - : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation + : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; - retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); - - // usage 2 --> use thecompared mask with input value and max value to blend - retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG, simdSize); + retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 break; } @@ -1509,47 +1464,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector512_ConvertToUInt32: { assert(sig->numArgs == 1); - assert(varTypeIsFloating(simdBaseType)); -#ifdef TARGET_AMD64 + assert(simdBaseType == TYP_FLOAT); if (IsBaselineVector512IsaSupportedOpportunistically()) { op1 = impSIMDPopStack(); - - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: - // SNAN: 0b1000 - // ZERO: 0b0000: - // +ONE: 0b0000 - // -INF: 0b1000 - // +INF: 0b0000 - // -VAL: 0b1000: Saturate to Zero - // +VAL: 0b0000 - for (int i = 0; i < 16; i++) - { - tbl->gtSimdVal.i32[i] = 0x08080088; - } - - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - - // run vfixupimmsd base on table and no flags reporting - GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation - : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation - : NI_AVX512F_ConvertToVector512UInt32WithTruncation; + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation + : NI_AVX512F_ConvertToVector512UInt32WithTruncation; - retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 break; } @@ -1559,46 +1483,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); -#ifdef TARGET_AMD64 if (IsBaselineVector512IsaSupportedOpportunistically()) { op1 = impSIMDPopStack(); - - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: - // SNAN: 0b1000 - // ZERO: 0b0000: - // +ONE: 0b0000 - // -INF: 0b1000 - // +INF: 0b0000 - // -VAL: 0b1000: Saturate to Zero - // +VAL: 0b0000 - for (int i = 0; i < 8; i++) - { - tbl->gtSimdVal.i64[i] = 0x08080088; - } - - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - - // run vfixupimmsd base on table and no flags reporting - GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation - : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation + : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; - retNode = gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 break; } @@ -1608,63 +1501,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_FLOAT); -#ifdef TARGET_AMD64 if (IsBaselineVector512IsaSupportedOpportunistically()) { op1 = impSIMDPopStack(); - - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: Saturate to Zero - // SNAN: 0b1000: Saturate to Zero - // ZERO: 0b0000 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - for (int i = 0; i < 16; i++) - { - tbl->gtSimdVal.i32[i] = 0x00000088; - } - - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - // GenTree* op2Clone; - // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL, - // nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion")); - - // run vfixupimmsd base on table and no flags reporting - GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - - GenTree* max_val = gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast(INT32_MAX)), - simdBaseJitType, simdSize); - GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT), - CORINFO_TYPE_INT, simdSize); - // we will be using the input value twice - GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); - - // usage 1 --> compare with max value of integer - saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); - // cast it - intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation - : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation - : NI_AVX512F_ConvertToVector512Int32WithTruncation; - - retNode = gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); + : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation + : NI_AVX512F_ConvertToVector512Int32WithTruncation; - // usage 2 --> use thecompared mask with input value and max value to blend - retNode = gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT, simdSize); + retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 break; } @@ -1693,8 +1538,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()) { intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single - : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single - : NI_AVX512F_ConvertToVector512Single; + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single + : NI_AVX512F_ConvertToVector512Single; } if (intrinsic != NI_Illegal) { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 1394501f7da9b..7cdad95169262 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -532,12 +532,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case NI_VectorT_ConvertToUInt32: case NI_VectorT_ConvertToUInt64: { -#ifdef TARGET_AMD64 +#ifdef TARGET_XARCH if (IsBaselineVector512IsaSupportedOpportunistically()) { break; } -#endif // TARGET_AMD64 +#endif // TARGET_XARCH return nullptr; } @@ -1181,224 +1181,58 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); -#ifdef TARGET_AMD64 if (IsBaselineVector512IsaSupportedOpportunistically()) { - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: Saturate to Zero - // SNAN: 0b1000: Saturate to Zero - // ZERO: 0b0000 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - for (int i = 0; i < 8; i++) - { - tbl->gtSimdVal.i64[i] = 0x00000088; - } + NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation + : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - // GenTree* op2Clone; - // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL, - // nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion")); - - // run vfixupimmsd base on table and no flags reporting - GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - - GenTree* max_val = - gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeD(static_cast(INT64_MAX)), - simdBaseJitType, simdSize); - GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG), - CORINFO_TYPE_LONG, simdSize); - // we will be using the input value twice - GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); - - // usage 1 --> compare with max value of integer - saturate_val = - gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); - // cast it - - NamedIntrinsic intrinsic = - (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation - : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; - - GenTree* retNode = - gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); - - // usage 2 --> use thecompared mask with input value and max value to blend - return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_LONG, - simdSize); + return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 return nullptr; } case NI_VectorT_ConvertToUInt32: { assert(sig->numArgs == 1); - assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT)); -#ifdef TARGET_AMD64 + assert(simdBaseType == TYP_FLOAT); if (IsBaselineVector512IsaSupportedOpportunistically()) { - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: - // SNAN: 0b1000 - // ZERO: 0b0000: - // +ONE: 0b0000 - // -INF: 0b1000 - // +INF: 0b0000 - // -VAL: 0b1000: Saturate to Zero - // +VAL: 0b0000 - for (int i = 0; i < 16; i++) - { - tbl->gtSimdVal.i32[i] = 0x08080088; - } - - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - - // run vfixupimmsd base on table and no flags reporting - GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - - intrinsic = (simdSize == 16) - ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation - : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation - : NI_AVX512F_ConvertToVector512UInt32WithTruncation; + NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation + : NI_AVX512F_ConvertToVector512UInt32WithTruncation; - return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 return nullptr; } case NI_VectorT_ConvertToUInt64: { assert(sig->numArgs == 1); - assert((simdBaseType == TYP_DOUBLE) || (simdBaseType == TYP_FLOAT)); -#ifdef TARGET_AMD64 + assert(simdBaseType == TYP_DOUBLE); if (IsBaselineVector512IsaSupportedOpportunistically()) { - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: - // SNAN: 0b1000 - // ZERO: 0b0000: - // +ONE: 0b0000 - // -INF: 0b1000 - // +INF: 0b0000 - // -VAL: 0b1000: Saturate to Zero - // +VAL: 0b0000 - for (int i = 0; i < 8; i++) - { - tbl->gtSimdVal.i64[i] = 0x08080088; - } - - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - - // run vfixupimmsd base on table and no flags reporting - GenTree* retNode1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); + NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation + : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; - intrinsic = (simdSize == 16) - ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation - : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; - - return gtNewSimdHWIntrinsicNode(retType, retNode1, intrinsic, simdBaseJitType, simdSize); + return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 return nullptr; } case NI_VectorT_ConvertToInt32: { assert(simdBaseType == TYP_FLOAT); -#ifdef TARGET_AMD64 if (IsBaselineVector512IsaSupportedOpportunistically()) { - var_types simdType = getSIMDTypeForSize(simdSize); - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = gtNewVconNode(simdType); - - // QNAN: 0b1000: Saturate to Zero - // SNAN: 0b1000: Saturate to Zero - // ZERO: 0b0000 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - for (int i = 0; i < 16; i++) - { - tbl->gtSimdVal.i32[i] = 0x00000088; - } + NamedIntrinsic intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation + : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation + : NI_AVX512F_ConvertToVector512Int32WithTruncation; - // Generate first operand - // The logic is that first and second operand are basically the same because we want - // the output to be in the same xmm register - // Hence we clone the first operand - GenTree* op2Clone = fgMakeMultiUse(&op1); - // GenTree* op2Clone; - // op1 = impCloneExpr(op1, &op2Clone, CHECK_SPILL_ALL, - // nullptr DEBUGARG("Cloning double for Dbl2Ulng conversion")); - - // run vfixupimmsd base on table and no flags reporting - GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdBaseJitType, simdSize); - - GenTree* max_val = - gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast(INT32_MAX)), - simdBaseJitType, simdSize); - GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT32_MAX, TYP_INT), - CORINFO_TYPE_INT, simdSize); - // we will be using the input value twice - GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val); - - // usage 1 --> compare with max value of integer - saturate_val = - gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize); - // cast it - - NamedIntrinsic intrinsic = - (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation - : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation - : NI_AVX512F_ConvertToVector512Int32WithTruncation; - - GenTree* retNode = - gtNewSimdHWIntrinsicNode(retType, saturate_valDup, intrinsic, simdBaseJitType, simdSize); - - // usage 2 --> use thecompared mask with input value and max value to blend - return gtNewSimdCndSelNode(simdType, saturate_val, max_valDup, retNode, CORINFO_TYPE_INT, - simdSize); + return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize); } -#endif // TARGET_AMD64 return nullptr; } From 71201e2f0dea3ff76a816b41ca14c3119b4e53de Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 8 Mar 2024 01:17:55 -0800 Subject: [PATCH 14/14] Enable the accelerated instructions for conversion for X86 --- src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/gentree.cpp | 54 +++++++++++++++++---------- src/coreclr/jit/hwintrinsicxarch.cpp | 27 +++++++------- src/coreclr/jit/simdashwintrinsic.cpp | 28 ++++++++------ 4 files changed, 66 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index aebb563ca3c8d..d5d4f01ccb59d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3172,7 +3172,7 @@ class Compiler GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize); - +#if defined(TARGET_XARCH) GenTree* gtNewSimdCvtNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, @@ -3184,7 +3184,7 @@ class Compiler var_types sourceType, var_types targetType, unsigned simdSize); - +#endif //TARGET_XARCH GenTree* gtNewSimdCreateBroadcastNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0a3a8e89a8c4a..1ecf5058ea7fe 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21252,8 +21252,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s } #if defined(TARGET_XARCH) -GenTreeVecCon* Compiler::gtCvtCtrlTbl - (var_types type, var_types sourceType, var_types targetType, unsigned simdSize) +GenTreeVecCon* Compiler::gtCvtCtrlTbl(var_types type, var_types sourceType, var_types targetType, unsigned simdSize) { assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(IsBaselineVector512IsaSupportedDebugOnly()); @@ -21262,7 +21261,6 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); - GenTreeVecCon* tbl = gtNewVconNode(type); switch (sourceType) @@ -21277,7 +21275,7 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl tbl->gtSimdVal.i64[i] = 0x08080088; } break; - + case TYP_INT: case TYP_LONG: for (int i = 0; i < 8; i++) @@ -21285,12 +21283,12 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl tbl->gtSimdVal.i64[i] = 0x00000088; } break; - + default: unreached(); } break; - + case TYP_FLOAT: switch (targetType) { @@ -21301,7 +21299,7 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl tbl->gtSimdVal.i32[i] = 0x08080088; } break; - + case TYP_INT: case TYP_LONG: for (int i = 0; i < 16; i++) @@ -21309,20 +21307,24 @@ GenTreeVecCon* Compiler::gtCvtCtrlTbl tbl->gtSimdVal.i32[i] = 0x00000088; } break; - + default: unreached(); } break; - + default: unreached(); } return tbl; } -GenTree* Compiler::gtNewSimdCvtNode - (var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, CorInfoType simdTargetBaseJitType, CorInfoType simdSourceBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdCvtNode(var_types type, + GenTree* op1, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdTargetBaseJitType, + CorInfoType simdSourceBaseJitType, + unsigned simdSize) { assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(IsBaselineVector512IsaSupportedDebugOnly()); @@ -21343,17 +21345,29 @@ GenTree* Compiler::gtNewSimdCvtNode GenTree* op1Clone = fgMakeMultiUse(&op1); // run vfixupimmsd base on table and no flags reporting - GenTree* fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tbl, gtNewIconNode(0), - NI_AVX512F_Fixup, simdSourceBaseJitType, simdSize); + GenTree* fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tbl, gtNewIconNode(0), NI_AVX512F_Fixup, + simdSourceBaseJitType, simdSize); if (varTypeIsSigned(simdTargetBaseType)) { - ssize_t actualMaxVal = (varTypeIsInt(simdTargetBaseType)) ? INT32_MAX : INT64_MAX; - - GenTree* maxVal = gtNewDconNode(static_cast(actualMaxVal), simdSourceBaseType); - - maxVal = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize); - GenTree* maxValDup = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal, simdTargetBaseType), simdTargetBaseJitType, simdSize); + GenTree* maxVal; + GenTree* maxValDup; + if (varTypeIsLong(simdTargetBaseType)) + { + long long actualMaxVal = INT64_MAX; + maxVal = gtNewDconNode(static_cast(actualMaxVal), simdSourceBaseType); + maxVal = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize); + maxValDup = + gtNewSimdCreateBroadcastNode(type, gtNewLconNode(actualMaxVal), simdTargetBaseJitType, simdSize); + } + else + { + ssize_t actualMaxVal = INT32_MAX; + maxVal = gtNewDconNode(static_cast(actualMaxVal), simdSourceBaseType); + maxVal = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize); + maxValDup = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal, simdTargetBaseType), + simdTargetBaseJitType, simdSize); + } // we will be using the input value twice GenTree* fixupValDup = fgMakeMultiUse(&fixupVal); @@ -21372,7 +21386,7 @@ GenTree* Compiler::gtNewSimdCvtNode return gtNewSimdHWIntrinsicNode(type, fixupVal, hwIntrinsicID, simdSourceBaseJitType, simdSize); } } -#endif //TARGET_XARCH +#endif // TARGET_XARCH GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 3b9236ec8906e..b4a8e6495b1ff 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1451,8 +1451,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation - : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation + : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); } @@ -1467,13 +1467,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_FLOAT); if (IsBaselineVector512IsaSupportedOpportunistically()) { - op1 = impSIMDPopStack(); + op1 = impSIMDPopStack(); intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation - : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation - : NI_AVX512F_ConvertToVector512UInt32WithTruncation; + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation + : NI_AVX512F_ConvertToVector512UInt32WithTruncation; retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); } +# break; } @@ -1485,10 +1486,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_DOUBLE); if (IsBaselineVector512IsaSupportedOpportunistically()) { - op1 = impSIMDPopStack(); + op1 = impSIMDPopStack(); intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation - : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation + : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); } @@ -1503,10 +1504,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_FLOAT); if (IsBaselineVector512IsaSupportedOpportunistically()) { - op1 = impSIMDPopStack(); + op1 = impSIMDPopStack(); intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation - : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation - : NI_AVX512F_ConvertToVector512Int32WithTruncation; + : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation + : NI_AVX512F_ConvertToVector512Int32WithTruncation; retNode = gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize); } @@ -1538,8 +1539,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()) { intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128Single - : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single - : NI_AVX512F_ConvertToVector512Single; + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single + : NI_AVX512F_ConvertToVector512Single; } if (intrinsic != NI_Illegal) { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 7cdad95169262..bad5f1c1111ed 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1183,9 +1183,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_DOUBLE); if (IsBaselineVector512IsaSupportedOpportunistically()) { - NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation - : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + NamedIntrinsic intrinsic = + (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation + : NI_AVX512DQ_ConvertToVector512Int64WithTruncation; return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); } @@ -1198,9 +1199,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_FLOAT); if (IsBaselineVector512IsaSupportedOpportunistically()) { - NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation - : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation - : NI_AVX512F_ConvertToVector512UInt32WithTruncation; + NamedIntrinsic intrinsic = + (simdSize == 16) ? NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation + : (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation + : NI_AVX512F_ConvertToVector512UInt32WithTruncation; return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); } @@ -1213,9 +1215,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_DOUBLE); if (IsBaselineVector512IsaSupportedOpportunistically()) { - NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation - : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation - : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + NamedIntrinsic intrinsic = + (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation + : (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation + : NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); } @@ -1227,9 +1230,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(simdBaseType == TYP_FLOAT); if (IsBaselineVector512IsaSupportedOpportunistically()) { - NamedIntrinsic intrinsic = (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation - : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation - : NI_AVX512F_ConvertToVector512Int32WithTruncation; + NamedIntrinsic intrinsic = + (simdSize == 16) ? NI_SSE2_ConvertToVector128Int32WithTruncation + : (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation + : NI_AVX512F_ConvertToVector512Int32WithTruncation; return gtNewSimdCvtNode(retType, op1, intrinsic, CORINFO_TYPE_INT, simdBaseJitType, simdSize); }