Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow constant propagation of Vector.Zero. #65028

Merged
merged 96 commits into from
Feb 18, 2022
Merged

Conversation

TIHan
Copy link
Contributor

@TIHan TIHan commented Feb 8, 2022

We need to merge #63821 first.

I did this work to gain some understanding of how value numbering and constant propagation work in the JIT.

Description

Example code:

    [MethodImpl(MethodImplOptions.NoInlining)]
    static Vector128<float> AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero_AsVariable(Vector128<float> left)
    {
        var asVar = Vector128.Create(0f, 0f, 0f, 0f);
        return AdvSimd.CompareEqual(left, asVar);
    }

turns into:

    [MethodImpl(MethodImplOptions.NoInlining)]
    static Vector128<float> AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero_AsVariable(Vector128<float> left)
    {
        return AdvSimd.CompareEqual(left, Vector128<float>.Zero);
    }

ARM64 diffs

 G_M36659_IG02:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
-            movi    v16.4s, #0x00
-            fcmeq   v16.4s, v0.4s, v16.4s
+            fcmeq   v16.4s, v0.4s, #0
             mov     v0.16b, v16.16b
-						;; bbWeight=1    PerfScore 2.00
+						;; bbWeight=1    PerfScore 1.50
 G_M303_IG02:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
-            movi    v16.4s, #0x00
             mov     w0, wzr
-						;; bbWeight=1    PerfScore 1.00
+						;; bbWeight=1    PerfScore 0.50
 G_M303_IG03:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz, align
-            fcmeq   v17.4s, v0.4s, v16.4s
+            fcmeq   v16.4s, v0.4s, #0
             mov     w1, wzr
             align   [4 bytes for IG04]
             align   [4 bytes]
+            align   [4 bytes]
             align   [0 bytes]
-            align   [0 bytes]
-						;; bbWeight=4    PerfScore 10.00
+						;; bbWeight=4    PerfScore 12.00
 G_M303_IG04:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, loop=IG04, byref, isz
-            fcmeq   v17.4s, v0.4s, v16.4s
+            fcmeq   v16.4s, v0.4s, #0
             add     w1, w1, #1
             cmp     w1, #4
             blt     G_M303_IG04
@@ -43,14 +42,14 @@ G_M303_IG05:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
             blt     G_M303_IG03
 						;; bbWeight=4    PerfScore 8.00
 G_M303_IG06:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
-            mov     v0.16b, v17.16b
+            mov     v0.16b, v16.16b
 						;; bbWeight=1    PerfScore 0.50
 G_M303_IG07:        ; , epilog, nogc, extend
             ldp     fp, lr, [sp],#16
             ret     lr
 						;; bbWeight=1    PerfScore 2.00
 G_M7175_IG02:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
-            movi    v16.2d, #0x00
-            movi    v17.2d, #0x00
             mov     w0, wzr
-						;; bbWeight=1    PerfScore 1.50
+						;; bbWeight=1    PerfScore 0.50
 G_M7175_IG03:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz, align
-            cmeq    v18.2d, v0.2d, v17.2d
+            cmeq    v16.2d, v0.2d, #0
             mov     w1, wzr
             align   [4 bytes for IG04]
+            align   [4 bytes]
+            align   [4 bytes]
             align   [0 bytes]
-            align   [0 bytes]
-            align   [0 bytes]
-						;; bbWeight=4    PerfScore 8.00
+						;; bbWeight=4    PerfScore 12.00
 G_M7175_IG04:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, loop=IG04, byref, isz
-            cmeq    v18.2d, v0.2d, v16.2d
+            cmeq    v16.2d, v0.2d, #0
             add     w1, w1, #1
             cmp     w1, #4
             blt     G_M7175_IG04
@@ -45,14 +43,14 @@ G_M7175_IG05:        ; gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
             blt     G_M7175_IG03

Here are some notable x64 diffs:

 G_M3641_IG06:        ; gcrefRegs=00000008 {rbx}, byrefRegs=00000000 {}, byref
        vxorps   xmm6, xmm6, xmm6
        vxorps   xmm7, xmm7, xmm7
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
        call     hackishModuleName:hackishMethodName()
        ; gcr arg pop 0
        call     hackishModuleName:hackishMethodName()
@@ -443,8 +430,6 @@ G_M3641_IG06:        ; gcrefRegs=00000008 {rbx}, byrefRegs=00000000 {}, byref
        ; gcrRegs +[rax]
        ; gcr arg pop 0
        vmovupd  xmmword ptr [rax+8], xmm6
-G_M3641_IG08:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref, isz
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       jmp      SHORT G_M3641_IG10
-                        ;; bbWeight=0.50 PerfScore 2.17
-G_M3641_IG09:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
+       je       SHORT G_M3641_IG09
+                        ;; bbWeight=1    PerfScore 4.00
+G_M3641_IG08:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
        mov      esi, -1
                         ;; bbWeight=0.50 PerfScore 0.12
-G_M3641_IG10:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
+G_M3641_IG09:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
        mov      eax, esi
        call     CORINFO_HELP_ASSIGN_REF
        ; gcrRegs -[rdx rdi]
        ; byrRegs -[rcx]
-       vxorps   ymm0, ymm0, ymm0
        vmovupd  ymm6, ymmword ptr[reloc @RWD32]
-       vxorps   ymm0, ymm0, ymm0
        mov      rcx, 0xD1FFAB1E
        vextractf128 xmm7, ymm6, 1
        call     CORINFO_HELP_NEWSFAST
@@ -518,10 +516,10 @@ G_M50494_IG03:        ; , extend
        vmovupd  ymm0, ymmword ptr[reloc @RWD352]
        vmovupd  ymm1, ymmword ptr[reloc @RWD384]
        vmovupd  ymm2, ymmword ptr[reloc @RWD416]

Acceptance Criteria

Copy link
Member

@jakobbotsch jakobbotsch left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with a few nits.

@TIHan
Copy link
Contributor Author

TIHan commented Feb 18, 2022

Here are some notable x64 diffs:

 G_M3641_IG06:        ; gcrefRegs=00000008 {rbx}, byrefRegs=00000000 {}, byref
        vxorps   xmm6, xmm6, xmm6
        vxorps   xmm7, xmm7, xmm7
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
        call     hackishModuleName:hackishMethodName()
        ; gcr arg pop 0
        call     hackishModuleName:hackishMethodName()
@@ -443,8 +430,6 @@ G_M3641_IG06:        ; gcrefRegs=00000008 {rbx}, byrefRegs=00000000 {}, byref
        ; gcrRegs +[rax]
        ; gcr arg pop 0
        vmovupd  xmmword ptr [rax+8], xmm6
-G_M3641_IG08:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref, isz
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       vxorps   xmm0, xmm0, xmm0
-       jmp      SHORT G_M3641_IG10
-                        ;; bbWeight=0.50 PerfScore 2.17
-G_M3641_IG09:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
+       je       SHORT G_M3641_IG09
+                        ;; bbWeight=1    PerfScore 4.00
+G_M3641_IG08:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
        mov      esi, -1
                         ;; bbWeight=0.50 PerfScore 0.12
-G_M3641_IG10:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
+G_M3641_IG09:        ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
        mov      eax, esi
        call     CORINFO_HELP_ASSIGN_REF
        ; gcrRegs -[rdx rdi]
        ; byrRegs -[rcx]
-       vxorps   ymm0, ymm0, ymm0
        vmovupd  ymm6, ymmword ptr[reloc @RWD32]
-       vxorps   ymm0, ymm0, ymm0
        mov      rcx, 0xD1FFAB1E
        vextractf128 xmm7, ymm6, 1
        call     CORINFO_HELP_NEWSFAST
@@ -518,10 +516,10 @@ G_M50494_IG03:        ; , extend
        vmovupd  ymm0, ymmword ptr[reloc @RWD352]
        vmovupd  ymm1, ymmword ptr[reloc @RWD384]
        vmovupd  ymm2, ymmword ptr[reloc @RWD416]

@TIHan TIHan merged commit bdfff40 into dotnet:main Feb 18, 2022
@TIHan TIHan deleted the vec-zero-vn branch February 18, 2022 19:51
@ghost ghost locked as resolved and limited conversation to collaborators Mar 21, 2022
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants