-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Inconsistent and counterintuitive codegen behavior of +
operator for Vector512<T>
depending on the number and order of operands generated by compare instructions, unlike for Vector256<T>
#92261
Comments
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsDescriptionWhen I was playing with the Both the [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector256<byte> Test1(Vector256<byte> ymm0, Vector256<byte> ymm1, Vector256<byte> ymm2, Vector256<byte> ymm3)
=> Avx2.BlendVariable(ymm0, ymm2, Vector256.Equals(ymm0, ymm1) + Vector256.Equals(ymm2, ymm3)); In vzeroupper
mov rax,qword ptr [rsp+28h]
vmovupd ymm0,ymmword ptr [rdx]
vmovupd ymm1,ymmword ptr [r9]
vpcmpeqb ymm2,ymm0,ymmword ptr [r8]
vpcmpeqb ymm3,ymm1,ymmword ptr [rax]
vpaddb ymm2,ymm2,ymm3
vpblendvb ymm0,ymm0,ymm1,ymm2
vmovupd ymmword ptr [rcx],ymm0
mov rax,rcx
vzeroupper
ret In vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups ymm0,ymmword ptr [rdx]
vmovups ymm1,ymmword ptr [r9]
vpcmpeqb ymm2,ymm0,ymmword ptr [r8]
vpcmpeqb ymm3,ymm1,ymmword ptr [rax]
vpaddb ymm2,ymm2,ymm3
vpblendvb ymm0,ymm0,ymm1,ymm2
vmovups ymmword ptr [rcx],ymm0
mov rax,rcx
vzeroupper
ret But when it comes to [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512.Equals(zmm2, zmm3)); In vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups zmm0,zmmword ptr [rdx]
vmovups zmm1,zmmword ptr [r9]
vpcmpeqb k1,zmm0,zmmword ptr [r8]
vpcmpeqb k2,zmm1,zmmword ptr [rax]
kaddq k1,k1,k2 ; WRONG INSTRUCTION!
vpblendmb zmm0{k1},zmm0,zmm1
vmovups zmmword ptr [rcx],zmm0
mov rax,rcx
vzeroupper
ret While what I expect it to generate looks like: vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups zmm0,zmmword ptr [rdx]
vmovups zmm1,zmmword ptr [r9]
vpcmpeqb k1,zmm0,zmmword ptr [r8]
vpmovm2b zmm2,k1
vpcmpeqb k1,zmm1,zmmword ptr [rax]
vpmovm2b zmm3,k1
vpaddb zmm2,zmm2,zmm3
vpmovb2m k1,zmm2
vpblendmb zmm0{k1},zmm0,zmm1
vmovups zmmword ptr [rcx],zmm0
mov rax,rcx
vzeroupper
ret And even weirder, if I add [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend3(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512<byte>.Zero + Vector512.Equals(zmm2, zmm3)); It generates what I expect it to generate from vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups zmm0,zmmword ptr [rdx]
vmovups zmm1,zmmword ptr [r9]
vpcmpeqb k1,zmm0,zmmword ptr [r8]
vpmovm2b zmm2,k1
vpcmpeqb k1,zmm1,zmmword ptr [rax]
vpmovm2b zmm3,k1
vpaddb zmm2,zmm2,zmm3
vpmovb2m k1,zmm2
vpblendmb zmm0{k1},zmm0,zmm1
vmovups zmmword ptr [rcx],zmm0
mov rax,rcx
vzeroupper
ret Adding the Reproduction Steps
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector256<byte> Test1(Vector256<byte> ymm0, Vector256<byte> ymm1, Vector256<byte> ymm2, Vector256<byte> ymm3)
=> Avx2.BlendVariable(ymm0, ymm2, Vector256.Equals(ymm0, ymm1) + Vector256.Equals(ymm2, ymm3)); [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512.Equals(zmm2, zmm3)); [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend3(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512<byte>.Zero + Vector512.Equals(zmm2, zmm3)); Expected behaviorThe code: [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512.Equals(zmm2, zmm3)); and [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend3(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512<byte>.Zero + Vector512.Equals(zmm2, zmm3)); For both functions, it should generate something like: vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups zmm0,zmmword ptr [rdx]
vmovups zmm1,zmmword ptr [r9]
vpcmpeqb k1,zmm0,zmmword ptr [r8]
vpmovm2b zmm2,k1
vpcmpeqb k1,zmm1,zmmword ptr [rax]
vpmovm2b zmm3,k1
vpaddb zmm2,zmm2,zmm3
vpmovb2m k1,zmm2
vpblendmb zmm0{k1},zmm0,zmm1
vmovups zmmword ptr [rcx],zmm0
mov rax,rcx
vzeroupper
ret Actual behaviorFor the code: [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512.Equals(zmm2, zmm3)); It instead generates something like: vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups zmm0,zmmword ptr [rdx]
vmovups zmm1,zmmword ptr [r9]
vpcmpeqb k1,zmm0,zmmword ptr [r8]
vpcmpeqb k2,zmm1,zmmword ptr [rax]
kaddq k1,k1,k2
vpblendmb zmm0{k1},zmm0,zmm1
vmovups zmmword ptr [rcx],zmm0
mov rax,rcx
vzeroupper
ret Regression?Unknown Known WorkaroundsBy adding [MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static Vector512<byte> Test512Blend3(Vector512<byte> zmm0, Vector512<byte> zmm1, Vector512<byte> zmm2, Vector512<byte> zmm3)
=> Avx512BW.BlendVariable(zmm0, zmm2, Vector512.Equals(zmm0, zmm1) + Vector512<byte>.Zero + Vector512.Equals(zmm2, zmm3)); It generates: vzeroupper
mov rax,qword ptr [rsp+28h]
vmovups zmm0,zmmword ptr [rdx]
vmovups zmm1,zmmword ptr [r9]
vpcmpeqb k1,zmm0,zmmword ptr [r8]
vpmovm2b zmm2,k1
vpcmpeqb k1,zmm1,zmmword ptr [rax]
vpmovm2b zmm3,k1
vpaddb zmm2,zmm2,zmm3
vpmovb2m k1,zmm2
vpblendmb zmm0{k1},zmm0,zmm1
vmovups zmmword ptr [rcx],zmm0
mov rax,rcx
vzeroupper
ret Configuration
Visual Studio 2022 installationMicrosoft Visual Studio Community 2022 インストールされているバージョン:Community Visual C++ 2022 00482-90000-00000-AA248 ASP.NET and Web Tools 17.8.226.21692 AvaloniaPackage Extension 1.0 Azure App Service Tools v3.0.0 17.8.226.21692 Azure Functions and Web Jobs Tools 17.8.226.21692 C# ツール 4.8.0-2.23429.7+44555193fd1135b5d53a2099f76fec91e0d1ebde Code Cleanup On Save 1.0.12 Common Azure Tools 1.10 Extensibility Message Bus 1.4.39 (main@e8108eb) File Icons 2.7 Microsoft JVM Debugger 1.0 Mono Debugging for Visual Studio 17.8.14 (0c9914e) NuGet パッケージ マネージャー 6.8.0 Razor (ASP.NET Core) 17.8.2.2345506+ade90399d42c1a7bf92191b1c067816c0ae1c311 SonarLint for Visual Studio 7.3.0.77872 SQL Server Data Tools 17.8.64.0 Tweaks 2022 1.1.143 TypeScript Tools 17.0.20830.2001 Visual Basic ツール 4.8.0-2.23429.7+44555193fd1135b5d53a2099f76fec91e0d1ebde Visual F# Tools 17.8.0-beta.23425.10+0d3549fa5b8b6387ade191d76768405cefed8229 Visual Studio IntelliCode 2.2 VisualStudio.DeviceLog 1.0 VisualStudio.Mac 1.0 Xamarin 17.8.0.118 (main@35c256f) Xamarin Designer 17.8.1.11 (remotes/origin/d17-8@13ef934098) Xamarin Templates 17.8.16 (830b56a) Xamarin.Android SDK 13.2.1.2 (d17-5/a8a26c7) Xamarin.iOS and Xamarin.Mac SDK 16.4.0.16 (b5972410d) Other informationNo response
|
cc @dotnet/avx512-contrib |
Looks like an issue in the kmask logic here. The simplest thing is to avoid generating |
Description
When I was playing with the
Vector512<T>
, I found some inconsistent behavior of+
operator forVector512<byte>
, depending on one or more arguments being generated byVector512.Equals
or not.Both the
.NET 7
and.NET 8 RC 1
are doing the right thing forVector256<byte>
:In
.NET 7
, it generates:In
.NET 8 RC 1
, it generates:But when it comes to
Vector512<byte>
, it somehow goes wrong.In
.NET 8 RC 1
, it generates:While what I expect it to generate looks like:
And even weirder, if I add
Vector512<byte>.Zero
in advance of adding second operand, like:It generates what I expect it to generate from
Test512Blend
:Adding the
Vector512<T>.Zero
MUST NOT alter the result of the whole integer+
operation.Reproduction Steps
.NET 8 RC 1
:Expected behavior
The code:
and
For both functions, it should generate something like:
Actual behavior
For the code:
It instead generates something like:
Regression?
Unknown
Known Workarounds
By adding
Vector512<T>.Zero
in advance of adding second operand, like:It generates:
Configuration
8.0.100-rc.1.23455.8
Visual Studio 2022 installation
Microsoft Visual Studio Community 2022
Version 17.8.0 Preview 2.0
VisualStudio.17.Preview/17.8.0-pre.2.0+34112.27
Microsoft .NET Framework
Version 4.8.09032
インストールされているバージョン:Community
Visual C++ 2022 00482-90000-00000-AA248
Microsoft Visual C++ 2022
ASP.NET and Web Tools 17.8.226.21692
ASP.NET and Web Tools
AvaloniaPackage Extension 1.0
AvaloniaPackage Visual Studio Extension Detailed Info
Azure App Service Tools v3.0.0 17.8.226.21692
Azure App Service Tools v3.0.0
Azure Functions and Web Jobs Tools 17.8.226.21692
Azure Functions and Web Jobs Tools
C# ツール 4.8.0-2.23429.7+44555193fd1135b5d53a2099f76fec91e0d1ebde
IDE で使用する C# コンポーネント。プロジェクトの種類や設定に応じて、異なるバージョンのコンパイラを使用できます。
Code Cleanup On Save 1.0.12
Automatically run one of the Code Clean profiles when saving the document. This ensures your code is always formatted correctly and follows your coding style conventions.
Common Azure Tools 1.10
Provides common services for use by Azure Mobile Services and Microsoft Azure Tools.
Extensibility Message Bus 1.4.39 (main@e8108eb)
Provides common messaging-based MEF services for loosely coupled Visual Studio extension components communication and integration.
File Icons 2.7
Adds icons for files that are not recognized by Solution Explorer
Microsoft JVM Debugger 1.0
Provides support for connecting the Visual Studio debugger to JDWP compatible Java Virtual Machines
Mono Debugging for Visual Studio 17.8.14 (0c9914e)
Support for debugging Mono processes with Visual Studio.
NuGet パッケージ マネージャー 6.8.0
Visual Studio 内の NuGet パッケージ マネージャー。NuGet の詳細については、https://docs.nuget.org/ にアクセスしてください
Razor (ASP.NET Core) 17.8.2.2345506+ade90399d42c1a7bf92191b1c067816c0ae1c311
ASP.NET Core Razor の言語サービスを提供します。
SonarLint for Visual Studio 7.3.0.77872
SonarLint is an extension to your favorite IDE that provides on-the-fly feedback to developers on new bugs and quality issues injected into their code.
SQL Server Data Tools 17.8.64.0
Microsoft SQL Server Data Tools
Tweaks 2022 1.1.143
A collection of minor fixes and tweaks for Visual Studio to reduce the paper cuts and make you a happier developer
TypeScript Tools 17.0.20830.2001
TypeScript Tools for Microsoft Visual Studio
Visual Basic ツール 4.8.0-2.23429.7+44555193fd1135b5d53a2099f76fec91e0d1ebde
IDE で使用する Visual Basic コンポーネント。プロジェクトの種類や設定に応じて、異なるバージョンのコンパイラを使用できます。
Visual F# Tools 17.8.0-beta.23425.10+0d3549fa5b8b6387ade191d76768405cefed8229
Microsoft Visual F# Tools
Visual Studio IntelliCode 2.2
Visual Studio 向けの AI 支援付き開発。
VisualStudio.DeviceLog 1.0
パッケージに関する情報
VisualStudio.Mac 1.0
Mac Extension for Visual Studio
Xamarin 17.8.0.118 (main@35c256f)
Xamarin.iOS と Xamarin.Android の開発を有効にする Visual Studio 拡張機能
Xamarin Designer 17.8.1.11 (remotes/origin/d17-8@13ef934098)
Visual Studio で Xamarin Designer ツールを有効にするための Visual Studio 拡張機能。
Xamarin Templates 17.8.16 (830b56a)
Templates for building iOS, Android, and Windows apps with Xamarin and Xamarin.Forms.
Xamarin.Android SDK 13.2.1.2 (d17-5/a8a26c7)
Xamarin.Android Reference Assemblies and MSBuild support.
Mono: d9a6e87
Java.Interop: xamarin/java.interop/d17-5@149d70fe
SQLite: xamarin/sqlite@68c69d8
Xamarin.Android Tools: xamarin/xamarin-android-tools/d17-5@ca1552d
Xamarin.iOS and Xamarin.Mac SDK 16.4.0.16 (b5972410d)
Xamarin.iOS and Xamarin.Mac Reference Assemblies and MSBuild support.
Other information
No response
The text was updated successfully, but these errors were encountered: