From cef20486b047f853faea329923678d4878184605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Mon, 3 Jun 2019 11:33:19 +0200 Subject: [PATCH] Allow pregenerating all HW intrinsics in CoreLib This is a follow up to #24689 that lets us pregenerate all hardware intrinsics in CoreLib. We ensures the potentially unsupported code will never be reachable at runtime on CPUs that don't support it by not reporting the `IsSupported` property as intrinsic in crossgen. This ensures the support checks are always JITted. JITting the support checks is very cheap. There is cost in the form of an extra call and failure to do constant propagation of the return value, but the cost is negligible in practice and gets eliminated once the tiered JIT tiers the method up. We only do this in CoreLib because user code could technically not guard intrinsic use in `IsSupported` checks and pregenerating the code could lead to illegal instruction traps at runtime (instead of `PlatformNotSupportedException` throws) - it's a bad user experience. --- src/vm/methodtablebuilder.cpp | 6 ++--- src/zap/zapinfo.cpp | 49 +++++++++++++++++++++++++++++++++-- src/zap/zapper.cpp | 25 ++++++++++++++++-- 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 31c4b0a5ee8e..61fd1deba1aa 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -1520,15 +1520,13 @@ MethodTableBuilder::BuildMethodTableThrowing( { #if defined(CROSSGEN_COMPILE) #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - if ((!IsNgenPDBCompilationProcess() + if (!IsNgenPDBCompilationProcess() && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule()) - || (strcmp(className, "Sse") != 0 && strcmp(className, "Sse2") != 0)) #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) { // Disable AOT compiling for managed implementation of hardware intrinsics. // We specially treat them here to ensure correct ISA features are set during compilation - // The only exception to this rule are SSE and SSE2 intrinsics in CoreLib - we can - // safely expand those because we require them to be always available. + // The only exception to this rule are intrinsics in CoreLib. COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED); } #endif // defined(CROSSGEN_COMPILE) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 23fb3362c864..2edc73325643 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -441,7 +441,7 @@ void ZapInfo::CompileMethod() // this they can add the hint and reduce the perf cost at runtime. m_pImage->m_pPreloader->PrePrepareMethodIfNecessary(m_currentMethodHandle); - DWORD methodAttribs = getMethodAttribs(m_currentMethodHandle); + DWORD methodAttribs = m_pEEJitInfo->getMethodAttribs(m_currentMethodHandle); if (methodAttribs & CORINFO_FLG_AGGRESSIVE_OPT) { // Skip methods marked with MethodImplOptions.AggressiveOptimization, they will be jitted instead. In the future, @@ -450,6 +450,24 @@ void ZapInfo::CompileMethod() return; } +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + if (methodAttribs & CORINFO_FLG_JIT_INTRINSIC) + { + // Skip generating hardware intrinsic method bodies. + // + // The actual method bodies are only reachable via reflection/delegates, but we don't know what the + // implementation should do (whether it can do the actual intrinsic thing, or whether it should throw + // a PlatformNotSupportedException). + + const char* namespaceName; + getMethodNameFromMetadata(m_currentMethodHandle, nullptr, &namespaceName, nullptr); + if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0) + { + return; + } + } +#endif + m_jitFlags = ComputeJitFlags(m_currentMethodHandle); #ifdef FEATURE_READYTORUN_COMPILER @@ -2086,6 +2104,30 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction, *pbIndirectedHandles = TRUE; } +DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) +{ +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + if (attribs & CORINFO_FLG_JIT_INTRINSIC) + { + // Do not report the get_IsSupported method as an intrinsic. This will turn the call into a regular + // call. We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen + // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct + // answer for the CPU the code is running on. + + const char* namespaceName; + const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, nullptr, &namespaceName, nullptr); + + if (strcmp(methodName, "get_IsSupported") == 0 + && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0) + { + attribs &= ~CORINFO_FLG_JIT_INTRINSIC; + } + } +#endif + + return attribs; +} + //return a callable stub that will do the virtual or interface call @@ -2111,6 +2153,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, (CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_KINDONLY), pResult); + pResult->methodFlags = FilterMethodAttribsForIsSupported(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo); + #ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation()) { @@ -3681,7 +3725,8 @@ unsigned ZapInfo::getMethodHash(CORINFO_METHOD_HANDLE ftn) DWORD ZapInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn) { - return m_pEEJitInfo->getMethodAttribs(ftn); + DWORD result = m_pEEJitInfo->getMethodAttribs(ftn); + return FilterMethodAttribsForIsSupported(result, ftn, m_pEEJitInfo); } void ZapInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn, CorInfoMethodRuntimeFlags attribs) diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 5e5d190346ee..22102faac0d2 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -1189,11 +1189,32 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) #endif // _TARGET_X86_ #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - // If we're compiling CoreLib, allow RyuJIT to generate SIMD code so that we can expand some - // of the hardware intrinsics. + // If we're crossgenning CoreLib, allow generating all intrinsics. The generated code might + // not actually be supported by the processor at runtime so we compensate for it by + // not letting the get_IsSupported method to be intrinsically expanded in crossgen + // (see special handling around CORINFO_FLG_JIT_INTRINSIC in ZapInfo). + // That way the actual support checks will always be jitted. + // We only do this for CoreLib because forgetting to wrap intrinsics under IsSupported + // checks can lead to illegal instruction traps (instead of a nice managed exception). if (m_pEECompileInfo->GetAssemblyModule(m_hAssembly) == m_pEECompileInfo->GetLoaderModuleForMscorlib()) { m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD); + +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AES); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_PCLMULQDQ); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_FMA); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); +#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) } #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)