-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Codegen: Bound check not elided in ref struct Span field index access #72004
Comments
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsRepro: internal ref struct Foo
{
private readonly Span<int> _span;
internal Foo(Span<int> span) => _span = span;
internal void Set(int index)
{
if ((uint)index < (uint)_span.Length)
{
_span[index] = 42;
}
}
} Codegen (x64): sub rsp,38
xor eax,eax
mov [rsp+28],rax
mov [rsp+30],rax
mov rax,[rcx+8]
test rax,rax
je short M00_L02
lea rdx,[rax+10]
mov eax,[rax+8]
M00_L00:
lea rcx,[rsp+28]
mov [rcx],rdx
mov [rcx+8],eax
cmp dword ptr [rsp+30],3
jbe short M00_L01
lea rdx,[rsp+28]
cmp dword ptr [rdx+8],3 ; redundant check for bound-check -- should be elided
jbe short M00_L03
mov rax,[rdx]
mov dword ptr [rax+0C],2A
M00_L01:
add rsp,38
ret
M00_L02:
xor edx,edx
xor eax,eax
jmp short M00_L00
M00_L03:
call CORINFO_HELP_RNGCHKFAIL
int 3
; Total bytes of code 88 Workaround: fetch the span to a local. +Span<int> span = _span;
-if ((uint)index < (uint)_span.Length)
+if ((uint)index < (uint)span.Length)
{
- _span[index] = 42;
+ span[index] = 42;
} When the index is not constant (here 3), the same issue occurs. Complete listing of code (C#)using BenchmarkDotNet.Attributes;
BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>();
[DisassemblyDiagnoser]
[ShortRunJob]
public class Bench
{
private readonly int[] _array = new int[100];
[Benchmark]
public void NoLocalSpan()
{
Foo foo = new(_array);
foo.Set(3);
}
[Benchmark]
public void LocalSpan()
{
Foo1 foo = new(_array);
foo.Set(3);
}
}
internal ref struct Foo
{
private readonly Span<int> _span;
internal Foo(Span<int> span) => _span = span;
internal void Set(int index)
{
if ((uint)index < (uint)_span.Length)
{
_span[index] = 42;
}
}
}
internal ref struct Foo1
{
private readonly Span<int> _span;
internal Foo1(Span<int> span) => _span = span;
internal void Set(int index)
{
Span<int> span = _span;
if ((uint)index < (uint)span.Length)
{
span[index] = 42;
}
}
} Complete listing of generated code; Bench.NoLocalSpan()
sub rsp,38
xor eax,eax
mov [rsp+28],rax
mov [rsp+30],rax
mov rax,[rcx+8]
test rax,rax
je short M00_L02
lea rdx,[rax+10]
mov eax,[rax+8]
M00_L00:
lea rcx,[rsp+28]
mov [rcx],rdx
mov [rcx+8],eax
cmp dword ptr [rsp+30],3
jbe short M00_L01
lea rdx,[rsp+28]
cmp dword ptr [rdx+8],3
jbe short M00_L03
mov rax,[rdx]
mov dword ptr [rax+0C],2A
M00_L01:
add rsp,38
ret
M00_L02:
xor edx,edx
xor eax,eax
jmp short M00_L00
M00_L03:
call CORINFO_HELP_RNGCHKFAIL
int 3
; Total bytes of code 88
; Bench.LocalSpan()
sub rsp,18
xor eax,eax
mov [rsp+8],rax
mov [rsp+10],rax
mov rax,[rcx+8]
test rax,rax
je short M00_L02
lea rdx,[rax+10]
mov eax,[rax+8]
M00_L00:
lea rcx,[rsp+8]
mov [rcx],rdx
mov [rcx+8],eax
lea rdx,[rsp+8]
mov rax,[rdx]
mov edx,[rdx+8]
cmp edx,3
jbe short M00_L01
mov dword ptr [rax+0C],2A
M00_L01:
add rsp,18
ret
M00_L02:
xor edx,edx
xor eax,eax
jmp short M00_L00
; Total bytes of code 77 Cf. #67448 (comment)
|
Hm, interesting. Tried it with https://github.com/dotnet/installer version 7.0.100-preview7.22362.1 (win-x64, current main as of now), and there is a bound-check. |
|
hm.. actually it might be PGO... 🤔 |
So, in some cases JIT doesn't eliminate bound checks inside cold blocks - it's pretty normal behavior to me. So an example where it happens in non-cold block would be nice to have. Example: internal ref struct Foo
{
private readonly Span<int> _span;
internal Foo(Span<int> span) => _span = span;
[MethodImpl(MethodImplOptions.NoInlining)]
internal void Set(int index)
{
if ((uint)index < (uint)_span.Length)
_span[index] = 42;
}
}
public class Program
{
static void Main()
{
Foo f = new Foo(new [] {1,2,3,4});
for (int i = 0; i < 100; i++)
{
f.Set(1);
Thread.Sleep(16);
}
}
} Here is the codegen for ; Assembly listing for method Foo:Set(int):this
8B4108 mov eax, dword ptr [rcx+8]
3BD0 cmp edx, eax
730C jae SHORT G_M45610_IG03
488B01 mov rax, bword ptr [rcx]
8BD2 mov edx, edx
C704902A000000 mov dword ptr [rax+4*rdx], 42
;; size=19 bbWeight=1 PerfScore 6.50
G_M45610_IG03: ;; offset=0013H
C3 ret Now let me change the call from ; Assembly listing for method Foo:Set(int):this
4883EC28 sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M45610_IG02: ;; offset=0004H
3B5108 cmp edx, dword ptr [rcx+8]
7205 jb SHORT G_M45610_IG04
;; size=5 bbWeight=1 PerfScore 4.00
G_M45610_IG03: ;; offset=0009H
4883C428 add rsp, 40
C3 ret
;; size=5 bbWeight=1 PerfScore 1.25
G_M45610_IG04: ;; offset=000EH
3B5108 cmp edx, dword ptr [rcx+8]
730E jae SHORT G_M45610_IG05
488B01 mov rax, bword ptr [rcx]
8BD2 mov edx, edx
C704902A000000 mov dword ptr [rax+4*rdx], 42
EBE8 jmp SHORT G_M45610_IG03
;; size=19 bbWeight=0 PerfScore 0.00
G_M45610_IG05: ;; offset=0021H
E87AAC4B5F call CORINFO_HELP_RNGCHKFAIL
CC int3 A stale/generic PGO might bake profile that doesn't work well for currently tested app and can mess it up but presumably it's a rare case. cc @AndyAyersMS |
This is what I'd except too. But when run via BenchmarkDotNet it should be hot enough to get "optimized" Tier-1 code. May this be related to something in BDN and a preview version of .NET? |
Ok, I think I can reproduce yeah |
Repro:
Codegen (x64):
Workaround: fetch the span to a local.
When the index is not constant (here 3), the same issue occurs.
Complete listing of code (C#)
Complete listing of generated code
Cf. #67448 (comment)
category:cq
theme:bounds-checks
skill-level:intermediate
cost:small
impact:small
The text was updated successfully, but these errors were encountered: