-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT does not always fold inlined constants #62604
Comments
Tagging subscribers to this area: @JulieLeeMSFT Issue DetailsIt appears that JIT (x64) sometimes misses opportunities for folding inlined(?) constants. There are two pairs of methods, using System.Runtime.CompilerServices;
internal readonly struct SubId {
public static int MaxSize => 16;
}
internal readonly struct MyStruct
{
public static ReadOnlySpan<byte> EoL => new []{(byte)'\r', (byte)'\n'};
internal readonly byte[] _subject;
public MyStruct(byte[] subject){
_subject = subject;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 18
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeNoExtraAdd()
{
return SubId.MaxSize + EoL.Length + _subject.Length;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 16
add eax, 2
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeExtraAdd()
{
// This will emit two add instructions even when making SubId.MaxSize a const field
return _subject.Length + SubId.MaxSize + EoL.Length;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 17
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeNoExtraInc()
{
return _subject.Length + SubId.MaxSize + 1;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
inc eax
add eax, 16
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeExtraInc()
{
return _subject.Length + 1 + SubId.MaxSize;
}
} Sharplap and JIT dump running 1d4b5f6 ; Assembly listing for method foo.MyStruct:CalculateMaxSizeNoExtraAdd():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;* V01 loc0 [V01 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op
;# V02 OutArgs [V02 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;* V03 tmp1 [V03 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
;* V04 tmp2 [V04 ] ( 0, 0 ) struct (16) zero-ref "NewObj constructor temp"
;* V05 tmp3 [V05 ] ( 0, 0 ) struct ( 8) zero-ref "NewObj constructor temp"
;* V06 tmp4 [V06 ] ( 0, 0 ) byref -> zero-ref single-def V01._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V07 tmp5 [V07 ] ( 0, 0 ) int -> zero-ref single-def V01._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V08 tmp6 [V08 ] ( 0, 0 ) byref -> zero-ref V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
;* V09 tmp7 [V09 ] ( 0, 0 ) int -> zero-ref V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
;* V10 tmp8 [V10 ] ( 0, 0 ) byref -> zero-ref single-def V05._value(offs=0x00) P-INDEP "field V05._value (fldOffset=0x0)"
;
; Lcl frame size = 0
G_M57648_IG01:
;; bbWeight=1 PerfScore 0.00
G_M57648_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 18
;; bbWeight=1 PerfScore 4.25
G_M57648_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 10, prolog size 0, PerfScore 6.25, instruction count 4, allocated bytes for code 10 (MethodHash=f2321ecf) for method foo.MyStruct:CalculateMaxSizeNoExtraAdd():int:this
; ============================================================
; Assembly listing for method foo.MyStruct:CalculateMaxSizeExtraAdd():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;* V01 loc0 [V01 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op
;# V02 OutArgs [V02 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V03 tmp1 [V03,T01] ( 2, 4 ) int -> rax "impAppendStmt"
; V04 tmp2 [V04,T02] ( 2, 4 ) int -> rax "impAppendStmt"
;* V05 tmp3 [V05 ] ( 0, 0 ) struct (16) zero-ref "NewObj constructor temp"
;* V06 tmp4 [V06 ] ( 0, 0 ) struct ( 8) zero-ref "NewObj constructor temp"
;* V07 tmp5 [V07 ] ( 0, 0 ) byref -> zero-ref single-def V01._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V08 tmp6 [V08 ] ( 0, 0 ) int -> zero-ref single-def V01._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V09 tmp7 [V09 ] ( 0, 0 ) byref -> zero-ref V05._pointer(offs=0x00) P-INDEP "field V05._pointer (fldOffset=0x0)"
;* V10 tmp8 [V10 ] ( 0, 0 ) int -> zero-ref V05._length(offs=0x08) P-INDEP "field V05._length (fldOffset=0x8)"
;* V11 tmp9 [V11 ] ( 0, 0 ) byref -> zero-ref single-def V06._value(offs=0x00) P-INDEP "field V06._value (fldOffset=0x0)"
;
; Lcl frame size = 0
G_M14833_IG01:
;; bbWeight=1 PerfScore 0.00
G_M14833_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 16
add eax, 2
;; bbWeight=1 PerfScore 4.50
G_M14833_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 13, prolog size 0, PerfScore 6.80, instruction count 5, allocated bytes for code 13 (MethodHash=cc45c60e) for method foo.MyStruct:CalculateMaxSizeExtraAdd():int:this
; ============================================================
; Assembly listing for method foo.MyStruct:CalculateMaxSizeNoExtraInc():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 4 ) int -> rax "impAppendStmt"
;
; Lcl frame size = 0
G_M11669_IG01:
;; bbWeight=1 PerfScore 0.00
G_M11669_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 17
;; bbWeight=1 PerfScore 4.25
G_M11669_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 10, prolog size 0, PerfScore 6.25, instruction count 4, allocated bytes for code 10 (MethodHash=d8a9d26a) for method foo.MyStruct:CalculateMaxSizeNoExtraInc():int:this
; ============================================================
; Assembly listing for method foo.MyStruct:CalculateMaxSizeExtraInc():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 4 ) int -> rax "impAppendStmt"
;
; Lcl frame size = 0
G_M56724_IG01:
;; bbWeight=1 PerfScore 0.00
G_M56724_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
inc eax
add eax, 16
;; bbWeight=1 PerfScore 4.50
G_M56724_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 12, prolog size 0, PerfScore 6.70, instruction count 5, allocated bytes for code 12 (MethodHash=17e2226b) for method foo.MyStruct:CalculateMaxSizeExtraInc():int:this
; ============================================================
|
More cases that would benefit from forward substitution. See #6973. |
It appears that JIT (x64) sometimes misses opportunities for folding inlined(?) constants.
The following code demonstrates the issue. Depending on the order of the addends in C#, constants are more or less folded.
I did assume that order in C# would not impact JITs ability to fold them.
There are two pairs of methods,
CalculateMaxSize[No]ExtraAdd
andCalculateMaxSize[No]ExtraInc
.The
*No*
variants look like what I had expected for either implementation.Sharplap and JIT dump running 1d4b5f6
category:implementation
theme:inlining
skill-level:intermediate
cost:medium
impact:medium
The text was updated successfully, but these errors were encountered: