forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X64] [khushal1996] AVX10.1 API introduction in JIT #403
Comments
Top method regressions1373 (3,348.78 % of base) - System.Numerics.Tensors.TensorPrimitives+ConvertSingleToUInt32:Invoke(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] ; Assembly listing for method System.Numerics.Tensors.TensorPrimitives+ConvertSingleToUInt32:Invoke(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] (FullOpts)
-; Emitting BLENDED_CODE for X64 with AVX512 - Unix
+; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
-; rsp based frame
+; rbp based frame
; partially interruptible
; No PGO data
+; 0 inlinees with PGO data; 41 single block inlinees; 19 inlinees without PGO data
; Final local variable assignments
;
-; V00 RetBuf [V00,T00] ( 4, 4 ) byref -> rdi single-def
-; V01 arg0 [V01,T01] ( 2, 2 ) simd64 -> mm0 single-def <System.Runtime.Intrinsics.Vector512`1[float]>
+; V00 RetBuf [V00,T00] ( 5, 5 ) byref -> rdi single-def
+;* V01 arg0 [V01 ] ( 0, 0 ) struct (64) zero-ref single-def <System.Runtime.Intrinsics.Vector512`1[float]>
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+; V03 tmp1 [V03,T71] ( 2, 4 ) simd32 -> mm0 "impAppendStmt"
+; V04 tmp2 [V04,T72] ( 2, 4 ) simd32 -> mm1 "spilled call-like call argument"
+; V05 tmp3 [V05,T33] ( 3, 6 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[float]>
+; V06 tmp4 [V06,T73] ( 2, 4 ) simd16 -> mm1 "impAppendStmt"
+;* V07 tmp5 [V07 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V08 tmp6 [V08,T34] ( 3, 6 ) simd16 -> [rbp-0x10] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V09 tmp7 [V09 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V10 tmp8 [V10 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V11 tmp9 [V11 ] ( 3, 3 ) struct ( 8) [rbp-0x18] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V12 tmp10 [V12,T09] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V13 tmp11 [V13 ] ( 3, 6 ) struct ( 8) [rbp-0x20] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V14 tmp12 [V14 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V15 tmp13 [V15 ] ( 3, 3 ) struct ( 8) [rbp-0x28] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V16 tmp14 [V16,T10] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V17 tmp15 [V17 ] ( 3, 6 ) struct ( 8) [rbp-0x30] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V18 tmp16 [V18 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V19 tmp17 [V19,T75] ( 3, 3 ) simd16 -> [rbp-0x40] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+; V20 tmp18 [V20,T35] ( 3, 6 ) simd16 -> [rbp-0x50] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V21 tmp19 [V21 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V22 tmp20 [V22 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V23 tmp21 [V23 ] ( 3, 3 ) struct ( 8) [rbp-0x58] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V24 tmp22 [V24,T11] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V25 tmp23 [V25 ] ( 3, 6 ) struct ( 8) [rbp-0x60] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V26 tmp24 [V26 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V27 tmp25 [V27 ] ( 3, 3 ) struct ( 8) [rbp-0x68] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V28 tmp26 [V28,T12] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V29 tmp27 [V29 ] ( 3, 6 ) struct ( 8) [rbp-0x70] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V30 tmp28 [V30 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V31 tmp29 [V31,T76] ( 3, 3 ) simd16 -> [rbp-0x80] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+;* V32 tmp30 [V32 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[uint]>
+; V33 tmp31 [V33,T36] ( 3, 6 ) simd32 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[float]>
+; V34 tmp32 [V34,T74] ( 2, 4 ) simd16 -> mm2 "impAppendStmt"
+;* V35 tmp33 [V35 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V36 tmp34 [V36,T37] ( 3, 6 ) simd16 -> [rbp-0x90] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V37 tmp35 [V37 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V38 tmp36 [V38 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V39 tmp37 [V39 ] ( 3, 3 ) struct ( 8) [rbp-0x98] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V40 tmp38 [V40,T13] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V41 tmp39 [V41 ] ( 3, 6 ) struct ( 8) [rbp-0xA0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V42 tmp40 [V42 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V43 tmp41 [V43 ] ( 3, 3 ) struct ( 8) [rbp-0xA8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V44 tmp42 [V44,T14] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V45 tmp43 [V45 ] ( 3, 6 ) struct ( 8) [rbp-0xB0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V46 tmp44 [V46 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V47 tmp45 [V47,T77] ( 3, 3 ) simd16 -> [rbp-0xC0] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+; V48 tmp46 [V48,T38] ( 3, 6 ) simd16 -> [rbp-0xD0] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V49 tmp47 [V49 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V50 tmp48 [V50 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V51 tmp49 [V51 ] ( 3, 3 ) struct ( 8) [rbp-0xD8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V52 tmp50 [V52,T15] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V53 tmp51 [V53 ] ( 3, 6 ) struct ( 8) [rbp-0xE0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V54 tmp52 [V54 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V55 tmp53 [V55 ] ( 3, 3 ) struct ( 8) [rbp-0xE8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V56 tmp54 [V56,T16] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V57 tmp55 [V57 ] ( 3, 6 ) struct ( 8) [rbp-0xF0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V58 tmp56 [V58 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V59 tmp57 [V59,T78] ( 3, 3 ) simd16 -> [rbp-0x100] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+;* V60 tmp58 [V60 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[uint]>
+;* V61 tmp59 [V61 ] ( 0, 0 ) struct (64) zero-ref ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[uint]>
+; V62 tmp60 [V62,T81] ( 1, 1 ) simd32 -> [rbp+0x10] single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+; V63 tmp61 [V63,T82] ( 1, 1 ) simd32 -> [rbp+0x30] single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+; V64 tmp62 [V64,T01] ( 2, 2 ) long -> rax "field V09._00 (fldOffset=0x0)" P-INDEP
+; V65 tmp63 [V65,T02] ( 2, 2 ) long -> rcx "field V10._00 (fldOffset=0x0)" P-INDEP
+; V66 tmp64 [V66 ] ( 3, 3 ) long -> [rbp-0x18] do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+; V67 tmp65 [V67 ] ( 3, 5 ) long -> [rbp-0x20] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+; V68 tmp66 [V68 ] ( 3, 3 ) long -> [rbp-0x28] do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+; V69 tmp67 [V69 ] ( 3, 5 ) long -> [rbp-0x30] do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+; V70 tmp68 [V70,T03] ( 2, 2 ) long -> rax "field V21._00 (fldOffset=0x0)" P-INDEP
+; V71 tmp69 [V71,T04] ( 2, 2 ) long -> rcx "field V22._00 (fldOffset=0x0)" P-INDEP
+; V72 tmp70 [V72 ] ( 3, 3 ) long -> [rbp-0x58] do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+; V73 tmp71 [V73 ] ( 3, 5 ) long -> [rbp-0x60] do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+; V74 tmp72 [V74 ] ( 3, 3 ) long -> [rbp-0x68] do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+; V75 tmp73 [V75 ] ( 3, 5 ) long -> [rbp-0x70] do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+; V76 tmp74 [V76,T05] ( 2, 2 ) long -> rax "field V37._00 (fldOffset=0x0)" P-INDEP
+; V77 tmp75 [V77,T06] ( 2, 2 ) long -> rcx "field V38._00 (fldOffset=0x0)" P-INDEP
+; V78 tmp76 [V78 ] ( 3, 3 ) long -> [rbp-0x98] do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
+; V79 tmp77 [V79 ] ( 3, 5 ) long -> [rbp-0xA0] do-not-enreg[X] addr-exposed "field V41._00 (fldOffset=0x0)" P-DEP
+; V80 tmp78 [V80 ] ( 3, 3 ) long -> [rbp-0xA8] do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
+; V81 tmp79 [V81 ] ( 3, 5 ) long -> [rbp-0xB0] do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+; V82 tmp80 [V82,T07] ( 2, 2 ) long -> rax "field V49._00 (fldOffset=0x0)" P-INDEP
+; V83 tmp81 [V83,T08] ( 2, 2 ) long -> rcx "field V50._00 (fldOffset=0x0)" P-INDEP
+; V84 tmp82 [V84 ] ( 3, 3 ) long -> [rbp-0xD8] do-not-enreg[X] addr-exposed "field V51._00 (fldOffset=0x0)" P-DEP
+; V85 tmp83 [V85 ] ( 3, 5 ) long -> [rbp-0xE0] do-not-enreg[X] addr-exposed "field V53._00 (fldOffset=0x0)" P-DEP
+; V86 tmp84 [V86 ] ( 3, 3 ) long -> [rbp-0xE8] do-not-enreg[X] addr-exposed "field V55._00 (fldOffset=0x0)" P-DEP
+; V87 tmp85 [V87 ] ( 3, 5 ) long -> [rbp-0xF0] do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
+; V88 tmp86 [V88,T79] ( 2, 2 ) simd32 -> mm0 "field V61._lower (fldOffset=0x0)" P-INDEP
+; V89 tmp87 [V89,T80] ( 2, 2 ) simd32 -> mm1 "field V61._upper (fldOffset=0x20)" P-INDEP
+; V90 rat0 [V90,T17] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V91 rat1 [V91,T39] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V92 rat2 [V92,T40] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V93 rat3 [V93,T18] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V94 rat4 [V94,T41] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V95 rat5 [V95,T42] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V96 rat6 [V96,T19] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V97 rat7 [V97,T43] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V98 rat8 [V98,T44] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V99 rat9 [V99,T20] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V100 rat10 [V100,T45] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V101 rat11 [V101,T46] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V102 rat12 [V102,T21] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V103 rat13 [V103,T47] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V104 rat14 [V104,T48] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V105 rat15 [V105,T22] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V106 rat16 [V106,T49] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V107 rat17 [V107,T50] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V108 rat18 [V108,T23] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V109 rat19 [V109,T51] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V110 rat20 [V110,T52] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V111 rat21 [V111,T24] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V112 rat22 [V112,T53] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V113 rat23 [V113,T54] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V114 rat24 [V114,T25] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V115 rat25 [V115,T55] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V116 rat26 [V116,T56] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V117 rat27 [V117,T26] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V118 rat28 [V118,T57] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V119 rat29 [V119,T58] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V120 rat30 [V120,T27] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V121 rat31 [V121,T59] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V122 rat32 [V122,T60] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V123 rat33 [V123,T28] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V124 rat34 [V124,T61] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V125 rat35 [V125,T62] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V126 rat36 [V126,T29] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V127 rat37 [V127,T63] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V128 rat38 [V128,T64] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V129 rat39 [V129,T30] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V130 rat40 [V130,T65] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V131 rat41 [V131,T66] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V132 rat42 [V132,T31] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V133 rat43 [V133,T67] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V134 rat44 [V134,T68] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V135 rat45 [V135,T32] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V136 rat46 [V136,T69] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V137 rat47 [V137,T70] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
;
-; Lcl frame size = 0
+; Lcl frame size = 256
G_M2517_IG01:
- vmovups zmm0, zmmword ptr [rsp+0x08]
- ;; size=11 bbWeight=1 PerfScore 3.00
+ push rbp
+ sub rsp, 256
+ lea rbp, [rsp+0x100]
+ ;; size=16 bbWeight=1 PerfScore 1.75
G_M2517_IG02:
- vfixupimmps zmm0, zmm0, zmmword ptr [reloc @RWD00], 0
- vcvttps2udq zmm0, zmm0
- vmovups zmmword ptr [rdi], zmm0
- mov rax, rdi
- ;; size=26 bbWeight=1 PerfScore 10.25
+ vmovups ymm0, ymmword ptr [rbp+0x10]
+ vmovaps ymm1, ymm0
+ vmovaps xmmword ptr [rbp-0x10], xmm1
+ mov rax, qword ptr [rbp-0x10]
+ mov qword ptr [rbp-0x20], rax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x20]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vxorps xmm5, xmm5, xmm5
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0x18], eax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x1C]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0x14], eax
+ mov rax, qword ptr [rbp-0x18]
+ mov rcx, qword ptr [rbp-0x08]
+ mov qword ptr [rbp-0x30], rcx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x30]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0x28], ecx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x2C]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0x24], ecx
+ mov rcx, qword ptr [rbp-0x28]
+ ;; size=306 bbWeight=1 PerfScore 134.25
G_M2517_IG03:
+ mov qword ptr [rbp-0x40], rax
+ mov qword ptr [rbp-0x38], rcx
+ vmovaps xmm1, xmmword ptr [rbp-0x40]
+ vextractf128 xmm0, ymm0, 1
+ vmovaps xmmword ptr [rbp-0x50], xmm0
+ mov rax, qword ptr [rbp-0x50]
+ mov qword ptr [rbp-0x60], rax
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x60]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rax, xmm0
+ mov dword ptr [rbp-0x58], eax
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x5C]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rax, xmm0
+ mov dword ptr [rbp-0x54], eax
+ mov rax, qword ptr [rbp-0x58]
+ mov rcx, qword ptr [rbp-0x48]
+ mov qword ptr [rbp-0x70], rcx
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x70]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rcx, xmm0
+ mov dword ptr [rbp-0x68], ecx
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x6C]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rcx, xmm0
+ mov dword ptr [rbp-0x64], ecx
+ ;; size=308 bbWeight=1 PerfScore 135.67
+G_M2517_IG04:
+ mov rcx, qword ptr [rbp-0x68]
+ mov qword ptr [rbp-0x80], rax
+ mov qword ptr [rbp-0x78], rcx
+ vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
+ vmovups ymm1, ymmword ptr [rbp+0x30]
+ vmovaps ymm2, ymm1
+ vmovaps xmmword ptr [rbp-0x90], xmm2
+ mov rax, qword ptr [rbp-0x90]
+ mov qword ptr [rbp-0xA0], rax
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0xA0]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vxorps xmm6, xmm6, xmm6
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rax, xmm2
+ mov dword ptr [rbp-0x98], eax
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0x9C]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rax, xmm2
+ mov dword ptr [rbp-0x94], eax
+ mov rax, qword ptr [rbp-0x98]
+ mov rcx, qword ptr [rbp-0x88]
+ mov qword ptr [rbp-0xB0], rcx
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0xB0]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rcx, xmm2
+ mov dword ptr [rbp-0xA8], ecx
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0xAC]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ ;; size=341 bbWeight=1 PerfScore 129.25
+G_M2517_IG05:
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rcx, xmm2
+ mov dword ptr [rbp-0xA4], ecx
+ mov rcx, qword ptr [rbp-0xA8]
+ mov qword ptr [rbp-0xC0], rax
+ mov qword ptr [rbp-0xB8], rcx
+ vmovaps xmm2, xmmword ptr [rbp-0xC0]
+ vextractf128 xmm1, ymm1, 1
+ vmovaps xmmword ptr [rbp-0xD0], xmm1
+ mov rax, qword ptr [rbp-0xD0]
+ mov qword ptr [rbp-0xE0], rax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xE0]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0xD8], eax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xDC]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0xD4], eax
+ mov rax, qword ptr [rbp-0xD8]
+ mov rcx, qword ptr [rbp-0xC8]
+ mov qword ptr [rbp-0xF0], rcx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xF0]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0xE8], ecx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xEC]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ ;; size=362 bbWeight=1 PerfScore 136.33
+G_M2517_IG06:
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0xE4], ecx
+ mov rcx, qword ptr [rbp-0xE8]
+ mov qword ptr [rbp-0x100], rax
+ mov qword ptr [rbp-0xF8], rcx
+ vinserti128 ymm1, ymm2, xmmword ptr [rbp-0x100], 1
+ vmovups ymmword ptr [rdi], ymm0
+ vmovups ymmword ptr [rdi+0x20], ymm1
+ mov rax, rdi
+ ;; size=69 bbWeight=1 PerfScore 22.58
+G_M2517_IG07:
vzeroupper
+ add rsp, 256
+ pop rbp
ret
- ;; size=4 bbWeight=1 PerfScore 2.00
-RWD00 dq 0808008808080088h, 0808008808080088h, 0808008808080088h, 0808008808080088h, 0808008808080088h, 0808008808080088h, 0808008808080088h, 0808008808080088h
+ ;; size=12 bbWeight=1 PerfScore 2.75
+RWD00 dq 41EFFFFFFFE00000h, 41EFFFFFFFE00000h
-; Total bytes of code 41, prolog size 0, PerfScore 15.25, instruction count 7, allocated bytes for code 41 (MethodHash=4cb1f62a) for method System.Numerics.Tensors.TensorPrimitives+ConvertSingleToUInt32:Invoke(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] (FullOpts)
+; Total bytes of code 1414, prolog size 16, PerfScore 562.58, instruction count 282, allocated bytes for code 1414 (MethodHash=4cb1f62a) for method System.Numerics.Tensors.TensorPrimitives+ConvertSingleToUInt32:Invoke(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] (FullOpts) 1339 (1,785.33 % of base) - System.Runtime.Intrinsics.Vector512:ConvertToUInt32Native(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] ; Assembly listing for method System.Runtime.Intrinsics.Vector512:ConvertToUInt32Native(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] (FullOpts)
-; Emitting BLENDED_CODE for X64 with AVX512 - Unix
+; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
-; rsp based frame
+; rbp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 4 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 40 single block inlinees; 19 inlinees without PGO data
; Final local variable assignments
;
-; V00 RetBuf [V00,T00] ( 4, 4 ) byref -> rdi single-def
-; V01 arg0 [V01,T03] ( 2, 2 ) simd64 -> mm0 single-def <System.Runtime.Intrinsics.Vector512`1[float]>
+; V00 RetBuf [V00,T00] ( 5, 5 ) byref -> rdi single-def
+;* V01 arg0 [V01 ] ( 0, 0 ) struct (64) zero-ref single-def <System.Runtime.Intrinsics.Vector512`1[float]>
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V03 tmp1 [V03,T01] ( 2, 4 ) simd32 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[uint]>
-; V04 tmp2 [V04,T02] ( 2, 4 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[uint]>
-; V05 tmp3 [V05,T04] ( 1, 1 ) simd64 -> [rsp+0x00] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[uint]>
+; V03 tmp1 [V03,T71] ( 2, 4 ) simd32 -> mm0 "impAppendStmt"
+; V04 tmp2 [V04,T72] ( 2, 4 ) simd32 -> mm1 "spilled call-like call argument"
+; V05 tmp3 [V05,T33] ( 3, 6 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[float]>
+; V06 tmp4 [V06,T73] ( 2, 4 ) simd16 -> mm1 "impAppendStmt"
+;* V07 tmp5 [V07 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V08 tmp6 [V08,T34] ( 3, 6 ) simd16 -> [rbp-0x10] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V09 tmp7 [V09 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V10 tmp8 [V10 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V11 tmp9 [V11 ] ( 3, 3 ) struct ( 8) [rbp-0x18] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V12 tmp10 [V12,T09] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V13 tmp11 [V13 ] ( 3, 6 ) struct ( 8) [rbp-0x20] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V14 tmp12 [V14 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V15 tmp13 [V15 ] ( 3, 3 ) struct ( 8) [rbp-0x28] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V16 tmp14 [V16,T10] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V17 tmp15 [V17 ] ( 3, 6 ) struct ( 8) [rbp-0x30] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V18 tmp16 [V18 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V19 tmp17 [V19,T75] ( 3, 3 ) simd16 -> [rbp-0x40] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+; V20 tmp18 [V20,T35] ( 3, 6 ) simd16 -> [rbp-0x50] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V21 tmp19 [V21 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V22 tmp20 [V22 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V23 tmp21 [V23 ] ( 3, 3 ) struct ( 8) [rbp-0x58] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V24 tmp22 [V24,T11] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V25 tmp23 [V25 ] ( 3, 6 ) struct ( 8) [rbp-0x60] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V26 tmp24 [V26 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V27 tmp25 [V27 ] ( 3, 3 ) struct ( 8) [rbp-0x68] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V28 tmp26 [V28,T12] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V29 tmp27 [V29 ] ( 3, 6 ) struct ( 8) [rbp-0x70] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V30 tmp28 [V30 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V31 tmp29 [V31,T76] ( 3, 3 ) simd16 -> [rbp-0x80] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+;* V32 tmp30 [V32 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[uint]>
+; V33 tmp31 [V33,T36] ( 3, 6 ) simd32 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[float]>
+; V34 tmp32 [V34,T74] ( 2, 4 ) simd16 -> mm2 "impAppendStmt"
+;* V35 tmp33 [V35 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V36 tmp34 [V36,T37] ( 3, 6 ) simd16 -> [rbp-0x90] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V37 tmp35 [V37 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V38 tmp36 [V38 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V39 tmp37 [V39 ] ( 3, 3 ) struct ( 8) [rbp-0x98] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V40 tmp38 [V40,T13] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V41 tmp39 [V41 ] ( 3, 6 ) struct ( 8) [rbp-0xA0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V42 tmp40 [V42 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V43 tmp41 [V43 ] ( 3, 3 ) struct ( 8) [rbp-0xA8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V44 tmp42 [V44,T14] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V45 tmp43 [V45 ] ( 3, 6 ) struct ( 8) [rbp-0xB0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V46 tmp44 [V46 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V47 tmp45 [V47,T77] ( 3, 3 ) simd16 -> [rbp-0xC0] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+; V48 tmp46 [V48,T38] ( 3, 6 ) simd16 -> [rbp-0xD0] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[float]>
+;* V49 tmp47 [V49 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V50 tmp48 [V50 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[uint]>
+; V51 tmp49 [V51 ] ( 3, 3 ) struct ( 8) [rbp-0xD8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V52 tmp50 [V52,T15] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V53 tmp51 [V53 ] ( 3, 6 ) struct ( 8) [rbp-0xE0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V54 tmp52 [V54 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V55 tmp53 [V55 ] ( 3, 3 ) struct ( 8) [rbp-0xE8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[uint]>
+;* V56 tmp54 [V56,T16] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V57 tmp55 [V57 ] ( 3, 6 ) struct ( 8) [rbp-0xF0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[float]>
+;* V58 tmp56 [V58 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V59 tmp57 [V59,T78] ( 3, 3 ) simd16 -> [rbp-0x100] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+;* V60 tmp58 [V60 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[uint]>
+;* V61 tmp59 [V61 ] ( 0, 0 ) struct (64) zero-ref ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[uint]>
+; V62 tmp60 [V62,T81] ( 1, 1 ) simd32 -> [rbp+0x10] single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+; V63 tmp61 [V63,T82] ( 1, 1 ) simd32 -> [rbp+0x30] single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+; V64 tmp62 [V64,T01] ( 2, 2 ) long -> rax "field V09._00 (fldOffset=0x0)" P-INDEP
+; V65 tmp63 [V65,T02] ( 2, 2 ) long -> rcx "field V10._00 (fldOffset=0x0)" P-INDEP
+; V66 tmp64 [V66 ] ( 3, 3 ) long -> [rbp-0x18] do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+; V67 tmp65 [V67 ] ( 3, 5 ) long -> [rbp-0x20] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+; V68 tmp66 [V68 ] ( 3, 3 ) long -> [rbp-0x28] do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+; V69 tmp67 [V69 ] ( 3, 5 ) long -> [rbp-0x30] do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+; V70 tmp68 [V70,T03] ( 2, 2 ) long -> rax "field V21._00 (fldOffset=0x0)" P-INDEP
+; V71 tmp69 [V71,T04] ( 2, 2 ) long -> rcx "field V22._00 (fldOffset=0x0)" P-INDEP
+; V72 tmp70 [V72 ] ( 3, 3 ) long -> [rbp-0x58] do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+; V73 tmp71 [V73 ] ( 3, 5 ) long -> [rbp-0x60] do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+; V74 tmp72 [V74 ] ( 3, 3 ) long -> [rbp-0x68] do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+; V75 tmp73 [V75 ] ( 3, 5 ) long -> [rbp-0x70] do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+; V76 tmp74 [V76,T05] ( 2, 2 ) long -> rax "field V37._00 (fldOffset=0x0)" P-INDEP
+; V77 tmp75 [V77,T06] ( 2, 2 ) long -> rcx "field V38._00 (fldOffset=0x0)" P-INDEP
+; V78 tmp76 [V78 ] ( 3, 3 ) long -> [rbp-0x98] do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
+; V79 tmp77 [V79 ] ( 3, 5 ) long -> [rbp-0xA0] do-not-enreg[X] addr-exposed "field V41._00 (fldOffset=0x0)" P-DEP
+; V80 tmp78 [V80 ] ( 3, 3 ) long -> [rbp-0xA8] do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
+; V81 tmp79 [V81 ] ( 3, 5 ) long -> [rbp-0xB0] do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+; V82 tmp80 [V82,T07] ( 2, 2 ) long -> rax "field V49._00 (fldOffset=0x0)" P-INDEP
+; V83 tmp81 [V83,T08] ( 2, 2 ) long -> rcx "field V50._00 (fldOffset=0x0)" P-INDEP
+; V84 tmp82 [V84 ] ( 3, 3 ) long -> [rbp-0xD8] do-not-enreg[X] addr-exposed "field V51._00 (fldOffset=0x0)" P-DEP
+; V85 tmp83 [V85 ] ( 3, 5 ) long -> [rbp-0xE0] do-not-enreg[X] addr-exposed "field V53._00 (fldOffset=0x0)" P-DEP
+; V86 tmp84 [V86 ] ( 3, 3 ) long -> [rbp-0xE8] do-not-enreg[X] addr-exposed "field V55._00 (fldOffset=0x0)" P-DEP
+; V87 tmp85 [V87 ] ( 3, 5 ) long -> [rbp-0xF0] do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
+; V88 tmp86 [V88,T79] ( 2, 2 ) simd32 -> mm0 "field V61._lower (fldOffset=0x0)" P-INDEP
+; V89 tmp87 [V89,T80] ( 2, 2 ) simd32 -> mm1 "field V61._upper (fldOffset=0x20)" P-INDEP
+; V90 rat0 [V90,T17] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V91 rat1 [V91,T39] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V92 rat2 [V92,T40] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V93 rat3 [V93,T18] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V94 rat4 [V94,T41] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V95 rat5 [V95,T42] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V96 rat6 [V96,T19] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V97 rat7 [V97,T43] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V98 rat8 [V98,T44] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V99 rat9 [V99,T20] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V100 rat10 [V100,T45] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V101 rat11 [V101,T46] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V102 rat12 [V102,T21] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V103 rat13 [V103,T47] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V104 rat14 [V104,T48] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V105 rat15 [V105,T22] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V106 rat16 [V106,T49] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V107 rat17 [V107,T50] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V108 rat18 [V108,T23] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V109 rat19 [V109,T51] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V110 rat20 [V110,T52] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V111 rat21 [V111,T24] ( 5, 10 ) double -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V112 rat22 [V112,T53] ( 3, 6 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V113 rat23 [V113,T54] ( 3, 6 ) simd16 -> mm6 "ReplaceWithLclVar is creating a new local variable"
+; V114 rat24 [V114,T25] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V115 rat25 [V115,T55] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V116 rat26 [V116,T56] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V117 rat27 [V117,T26] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V118 rat28 [V118,T57] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V119 rat29 [V119,T58] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V120 rat30 [V120,T27] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V121 rat31 [V121,T59] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V122 rat32 [V122,T60] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V123 rat33 [V123,T28] ( 5, 10 ) double -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V124 rat34 [V124,T61] ( 3, 6 ) simd16 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V125 rat35 [V125,T62] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V126 rat36 [V126,T29] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V127 rat37 [V127,T63] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V128 rat38 [V128,T64] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V129 rat39 [V129,T30] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V130 rat40 [V130,T65] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V131 rat41 [V131,T66] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V132 rat42 [V132,T31] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V133 rat43 [V133,T67] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V134 rat44 [V134,T68] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
+; V135 rat45 [V135,T32] ( 5, 10 ) double -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V136 rat46 [V136,T69] ( 3, 6 ) simd16 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+; V137 rat47 [V137,T70] ( 3, 6 ) simd16 -> mm7 "ReplaceWithLclVar is creating a new local variable"
;
-; Lcl frame size = 120
+; Lcl frame size = 256
G_M39315_IG01:
- sub rsp, 120
- vmovups zmm0, zmmword ptr [rsp+0x80]
- ;; size=12 bbWeight=1 PerfScore 3.25
+ push rbp
+ sub rsp, 256
+ lea rbp, [rsp+0x100]
+ ;; size=16 bbWeight=1 PerfScore 1.75
G_M39315_IG02:
- vmovaps zmm1, zmm0
- vcvttps2udq ymm1, ymm1
- vextractf64x4 ymm0, zmm0, 1
- vcvttps2udq ymm0, ymm0
- vmovups zmm2, zmmword ptr [rsp]
- vinsertf64x4 zmm1, zmm2, ymm1, 0
- vinsertf64x4 zmm0, zmm1, ymm0, 1
- vmovups zmmword ptr [rdi], zmm0
- mov rax, rdi
- ;; size=55 bbWeight=1 PerfScore 17.50
+ vmovups ymm0, ymmword ptr [rbp+0x10]
+ vmovaps ymm1, ymm0
+ vmovaps xmmword ptr [rbp-0x10], xmm1
+ mov rax, qword ptr [rbp-0x10]
+ mov qword ptr [rbp-0x20], rax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x20]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vxorps xmm5, xmm5, xmm5
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0x18], eax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x1C]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0x14], eax
+ mov rax, qword ptr [rbp-0x18]
+ mov rcx, qword ptr [rbp-0x08]
+ mov qword ptr [rbp-0x30], rcx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x30]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0x28], ecx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0x2C]
+ vmovddup xmm2, xmm1
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm1, xmm2, xmm1
+ vcmppd xmm2, xmm1, xmm6, 13
+ vblendvpd xmm1, xmm1, xmm6, xmm2
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0x24], ecx
+ mov rcx, qword ptr [rbp-0x28]
+ ;; size=306 bbWeight=1 PerfScore 134.25
G_M39315_IG03:
+ mov qword ptr [rbp-0x40], rax
+ mov qword ptr [rbp-0x38], rcx
+ vmovaps xmm1, xmmword ptr [rbp-0x40]
+ vextractf128 xmm0, ymm0, 1
+ vmovaps xmmword ptr [rbp-0x50], xmm0
+ mov rax, qword ptr [rbp-0x50]
+ mov qword ptr [rbp-0x60], rax
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x60]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rax, xmm0
+ mov dword ptr [rbp-0x58], eax
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x5C]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rax, xmm0
+ mov dword ptr [rbp-0x54], eax
+ mov rax, qword ptr [rbp-0x58]
+ mov rcx, qword ptr [rbp-0x48]
+ mov qword ptr [rbp-0x70], rcx
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x70]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rcx, xmm0
+ mov dword ptr [rbp-0x68], ecx
+ vcvtss2sd xmm0, xmm0, dword ptr [rbp-0x6C]
+ vmovddup xmm2, xmm0
+ vmovddup xmm3, xmm0
+ vmovddup xmm4, xmm0
+ vmovddup xmm0, xmm0
+ vmovups xmm6, xmmword ptr [reloc @RWD00]
+ vcmppd xmm2, xmm3, xmm2, 0
+ vcmppd xmm3, xmm4, xmm5, 13
+ vandpd xmm2, xmm3, xmm2
+ vandpd xmm0, xmm2, xmm0
+ vcmppd xmm2, xmm0, xmm6, 13
+ vblendvpd xmm0, xmm0, xmm6, xmm2
+ vcvttsd2si rcx, xmm0
+ mov dword ptr [rbp-0x64], ecx
+ ;; size=308 bbWeight=1 PerfScore 135.67
+G_M39315_IG04:
+ mov rcx, qword ptr [rbp-0x68]
+ mov qword ptr [rbp-0x80], rax
+ mov qword ptr [rbp-0x78], rcx
+ vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
+ vmovups ymm1, ymmword ptr [rbp+0x30]
+ vmovaps ymm2, ymm1
+ vmovaps xmmword ptr [rbp-0x90], xmm2
+ mov rax, qword ptr [rbp-0x90]
+ mov qword ptr [rbp-0xA0], rax
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0xA0]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vxorps xmm6, xmm6, xmm6
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rax, xmm2
+ mov dword ptr [rbp-0x98], eax
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0x9C]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rax, xmm2
+ mov dword ptr [rbp-0x94], eax
+ mov rax, qword ptr [rbp-0x98]
+ mov rcx, qword ptr [rbp-0x88]
+ mov qword ptr [rbp-0xB0], rcx
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0xB0]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rcx, xmm2
+ mov dword ptr [rbp-0xA8], ecx
+ vcvtss2sd xmm2, xmm2, dword ptr [rbp-0xAC]
+ vmovddup xmm3, xmm2
+ vmovddup xmm4, xmm2
+ vmovddup xmm5, xmm2
+ vmovddup xmm2, xmm2
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm2, xmm3, xmm2
+ ;; size=341 bbWeight=1 PerfScore 129.25
+G_M39315_IG05:
+ vcmppd xmm3, xmm2, xmm7, 13
+ vblendvpd xmm2, xmm2, xmm7, xmm3
+ vcvttsd2si rcx, xmm2
+ mov dword ptr [rbp-0xA4], ecx
+ mov rcx, qword ptr [rbp-0xA8]
+ mov qword ptr [rbp-0xC0], rax
+ mov qword ptr [rbp-0xB8], rcx
+ vmovaps xmm2, xmmword ptr [rbp-0xC0]
+ vextractf128 xmm1, ymm1, 1
+ vmovaps xmmword ptr [rbp-0xD0], xmm1
+ mov rax, qword ptr [rbp-0xD0]
+ mov qword ptr [rbp-0xE0], rax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xE0]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0xD8], eax
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xDC]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rax, xmm1
+ mov dword ptr [rbp-0xD4], eax
+ mov rax, qword ptr [rbp-0xD8]
+ mov rcx, qword ptr [rbp-0xC8]
+ mov qword ptr [rbp-0xF0], rcx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xF0]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0xE8], ecx
+ vcvtss2sd xmm1, xmm1, dword ptr [rbp-0xEC]
+ vmovddup xmm3, xmm1
+ vmovddup xmm4, xmm1
+ vmovddup xmm5, xmm1
+ vmovddup xmm1, xmm1
+ vmovups xmm7, xmmword ptr [reloc @RWD00]
+ vcmppd xmm3, xmm4, xmm3, 0
+ vcmppd xmm4, xmm5, xmm6, 13
+ vandpd xmm3, xmm4, xmm3
+ ;; size=362 bbWeight=1 PerfScore 136.33
+G_M39315_IG06:
+ vandpd xmm1, xmm3, xmm1
+ vcmppd xmm3, xmm1, xmm7, 13
+ vblendvpd xmm1, xmm1, xmm7, xmm3
+ vcvttsd2si rcx, xmm1
+ mov dword ptr [rbp-0xE4], ecx
+ mov rcx, qword ptr [rbp-0xE8]
+ mov qword ptr [rbp-0x100], rax
+ mov qword ptr [rbp-0xF8], rcx
+ vinserti128 ymm1, ymm2, xmmword ptr [rbp-0x100], 1
+ vmovups ymmword ptr [rdi], ymm0
+ vmovups ymmword ptr [rdi+0x20], ymm1
+ mov rax, rdi
+ ;; size=69 bbWeight=1 PerfScore 22.58
+G_M39315_IG07:
vzeroupper
- add rsp, 120
+ add rsp, 256
+ pop rbp
ret
- ;; size=8 bbWeight=1 PerfScore 2.25
+ ;; size=12 bbWeight=1 PerfScore 2.75
+RWD00 dq 41EFFFFFFFE00000h, 41EFFFFFFFE00000h
+
-; Total bytes of code 75, prolog size 4, PerfScore 23.00, instruction count 14, allocated bytes for code 75 (MethodHash=c491666c) for method System.Runtime.Intrinsics.Vector512:ConvertToUInt32Native(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] (FullOpts)
+; Total bytes of code 1414, prolog size 16, PerfScore 562.58, instruction count 282, allocated bytes for code 1414 (MethodHash=c491666c) for method System.Runtime.Intrinsics.Vector512:ConvertToUInt32Native(System.Runtime.Intrinsics.Vector512`1[float]):System.Runtime.Intrinsics.Vector512`1[uint] (FullOpts) Larger list of diffs: https://gist.github.com/MihuBot/0ef61b753fd0ded9926458c1bc385801 |
@EgorBo ^ this one is with an Intel CPU |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Job completed in 20 minutes.
Diffs
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: