diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index f0e7fc6322f2b..dad1df9ffcfb7 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -7652,15 +7652,17 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre } case NI_SSE2_ConvertToVector128Double: - case NI_SSE3_MoveAndDuplicate: case NI_AVX_ConvertToVector256Double: + case NI_AVX512F_ConvertToVector512Double: + case NI_AVX512F_VL_ConvertToVector128Double: + case NI_AVX512F_VL_ConvertToVector256Double: { assert(!supportsSIMDScalarLoads); // Most instructions under the non-VEX encoding require aligned operands. // Those used for Sse2.ConvertToVector128Double (CVTDQ2PD and CVTPS2PD) - // and Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't fail for - // unaligned inputs as they read mem64 (half the vector width) instead + // are exceptions and don't fail for unaligned inputs as they read half + // the vector width instead supportsAlignedSIMDLoads = !comp->opts.MinOpts(); supportsUnalignedSIMDLoads = true; @@ -7668,10 +7670,29 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / 2; const unsigned operandSize = genTypeSize(childNode->TypeGet()); - // For broadcasts we can only optimize constants and memory operands - const bool broadcastIsContainable = childNode->OperIsConst() || childNode->isMemoryOp(); - supportsGeneralLoads = - broadcastIsContainable && supportsUnalignedSIMDLoads && (operandSize >= expectedSize); + if (childNode->OperIsConst() || childNode->isMemoryOp()) + { + // For broadcasts we can only optimize constants and memory operands + // since we're going from a smaller base type to a larger base type + supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize); + } + break; + } + + case NI_SSE3_MoveAndDuplicate: + { + // Most instructions under the non-VEX encoding require aligned operands. + // Those used for Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't + // fail for unaligned inputs as they read half the vector width instead + + supportsAlignedSIMDLoads = !comp->opts.MinOpts(); + supportsUnalignedSIMDLoads = true; + + const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / 2; + const unsigned operandSize = genTypeSize(childNode->TypeGet()); + + supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize); + supportsSIMDScalarLoads = true; break; } @@ -7697,8 +7718,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre break; } } - - assert(supportsSIMDScalarLoads == false); break; } diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.cs b/src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.cs new file mode 100644 index 0000000000000..0725a973faedf --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.cs @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using Xunit; + +public static class Runtime_100404 +{ + [Fact] + [MethodImpl(MethodImplOptions.NoInlining)] + public static void TestMultiplyVector128DoubleByConstant() + { + Vector128 result = Map(Vector128.One, new FloatPoint(2.0, 3.0)); + Assert.Equal(2.0, result[0]); + Assert.Equal(2.0, result[1]); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector128 Map(Vector128 m0, FloatPoint point) + { + return m0 * Vector128.Create(point.X); + } + + private struct FloatPoint(double x, double y) + { + public double X = x; + public double Y = y; + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.csproj new file mode 100644 index 0000000000000..15edd99711a1a --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.csproj @@ -0,0 +1,8 @@ + + + True + + + + + \ No newline at end of file