Skip to content

Commit

Permalink
A more efficient slice comparison implementation for T: !BytewiseEq
Browse files Browse the repository at this point in the history
The previous implementation was not optimized properly by the compiler,
which didn't leverage the fact that both length were equal.
  • Loading branch information
krtab committed Oct 17, 2023
1 parent 347452e commit 9348c33
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 1 deletion.
19 changes: 18 additions & 1 deletion library/core/src/slice/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,24 @@ where
return false;
}

self.iter().zip(other.iter()).all(|(x, y)| x == y)
let mut i = self.len();
let mut ptr_self = self.as_ptr();
let mut ptr_other = other.as_ptr();
// SAFETY:
// This is sound because:
// - self.len == other.len
// - self.len <= isize::MAX
// so the two pointers will not overflow,
// will remain in bounds of the slice,
// and dereferencing them is sound.
unsafe {
while (i > 0) && (*ptr_self == *ptr_other) {
i -= 1;
ptr_self = ptr_self.add(1);
ptr_other = ptr_other.add(1);
}
}
i == 0
}
}

Expand Down
97 changes: 97 additions & 0 deletions mre.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
; ModuleID = 'mre.44c9d9ceb0d5fb53-cgu.0'
source_filename = "mre.44c9d9ceb0d5fb53-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nofree nosync nounwind nonlazybind memory(read) uwtable
define noundef zeroext i1 @test(ptr noalias nocapture noundef nonnull readonly align 8 %0, i64 noundef %1, ptr noalias nocapture noundef nonnull readonly align 8 %2, i64 noundef %3) unnamed_addr #0 {
start:
tail call void @llvm.experimental.noalias.scope.decl(metadata !3)
tail call void @llvm.experimental.noalias.scope.decl(metadata !6)
%_3.not.i = icmp eq i64 %1, %3
br i1 %_3.not.i, label %bb3.preheader.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit"

bb3.preheader.i: ; preds = %start
%_9.not4.i = icmp eq i64 %1, 0
br i1 %_9.not4.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit", label %bb4.i

bb4.i: ; preds = %bb3.preheader.i, %bb6.i
%i.07.i = phi i64 [ %6, %bb6.i ], [ %1, %bb3.preheader.i ]
%ptr_self.06.i = phi ptr [ %_14.i, %bb6.i ], [ %0, %bb3.preheader.i ]
%ptr_other.05.i = phi ptr [ %_16.i, %bb6.i ], [ %2, %bb3.preheader.i ]
tail call void @llvm.experimental.noalias.scope.decl(metadata !8)
tail call void @llvm.experimental.noalias.scope.decl(metadata !11)
tail call void @llvm.experimental.noalias.scope.decl(metadata !13)
tail call void @llvm.experimental.noalias.scope.decl(metadata !16)
%_5.i.i.i = load i64, ptr %ptr_self.06.i, align 8, !range !18, !alias.scope !19, !noalias !20, !noundef !21
%trunc.not.i.i.i = icmp eq i64 %_5.i.i.i, 0
%_3.i.i.i = load i64, ptr %ptr_other.05.i, align 8, !range !18, !alias.scope !20, !noalias !19, !noundef !21
%4 = icmp eq i64 %_3.i.i.i, 0
%brmerge.i.i.i = or i1 %trunc.not.i.i.i, %4
br i1 %brmerge.i.i.i, label %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i", label %bb4.i.i.i

bb4.i.i.i: ; preds = %bb4.i
%_8.i.i.i = getelementptr inbounds { i64, i64 }, ptr %ptr_self.06.i, i64 0, i32 1
%_9.i.i.i = getelementptr inbounds { i64, i64 }, ptr %ptr_other.05.i, i64 0, i32 1
tail call void @llvm.experimental.noalias.scope.decl(metadata !22)
tail call void @llvm.experimental.noalias.scope.decl(metadata !25)
%_3.i.i.i.i = load i64, ptr %_8.i.i.i, align 8, !alias.scope !27, !noalias !28, !noundef !21
%_4.i.i.i.i = load i64, ptr %_9.i.i.i, align 8, !alias.scope !28, !noalias !27, !noundef !21
%_0.i.i.i.i = icmp eq i64 %_3.i.i.i.i, %_4.i.i.i.i
br i1 %_0.i.i.i.i, label %bb6.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit"

"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i": ; preds = %bb4.i
%5 = or i64 %_3.i.i.i, %_5.i.i.i
%.mux.i.i.i = icmp eq i64 %5, 0
br i1 %.mux.i.i.i, label %bb6.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit"

bb6.i: ; preds = %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i", %bb4.i.i.i
%6 = add i64 %i.07.i, -1
%_14.i = getelementptr inbounds { i64, i64 }, ptr %ptr_self.06.i, i64 1
%_16.i = getelementptr inbounds { i64, i64 }, ptr %ptr_other.05.i, i64 1
%_9.not.i = icmp eq i64 %6, 0
br i1 %_9.not.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit", label %bb4.i

"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit": ; preds = %bb4.i.i.i, %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i", %bb6.i, %start, %bb3.preheader.i
%_0.0.i = phi i1 [ false, %start ], [ true, %bb3.preheader.i ], [ true, %bb6.i ], [ false, %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i" ], [ false, %bb4.i.i.i ]
ret i1 %_0.0.i
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
declare void @llvm.experimental.noalias.scope.decl(metadata) #1

attributes #0 = { nofree nosync nounwind nonlazybind memory(read) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.74.0-beta.1 (b5c050feb 2023-10-03)"}
!3 = !{!4}
!4 = distinct !{!4, !5, !"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E: %self.0"}
!5 = distinct !{!5, !"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E"}
!6 = !{!7}
!7 = distinct !{!7, !5, !"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E: %other.0"}
!8 = !{!9}
!9 = distinct !{!9, !10, !"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E: %self"}
!10 = distinct !{!10, !"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E"}
!11 = !{!12}
!12 = distinct !{!12, !10, !"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E: %other"}
!13 = !{!14}
!14 = distinct !{!14, !15, !"_ZN55_$LT$T$u20$as$u20$core..option..SpecOptionPartialEq$GT$2eq17h42d3740d8c76b9d0E: %l"}
!15 = distinct !{!15, !"_ZN55_$LT$T$u20$as$u20$core..option..SpecOptionPartialEq$GT$2eq17h42d3740d8c76b9d0E"}
!16 = !{!17}
!17 = distinct !{!17, !15, !"_ZN55_$LT$T$u20$as$u20$core..option..SpecOptionPartialEq$GT$2eq17h42d3740d8c76b9d0E: %r"}
!18 = !{i64 0, i64 2}
!19 = !{!14, !9, !4}
!20 = !{!17, !12, !7}
!21 = !{}
!22 = !{!23}
!23 = distinct !{!23, !24, !"_ZN4core3cmp5impls54_$LT$impl$u20$core..cmp..PartialEq$u20$for$u20$u64$GT$2eq17hcb36ad6f45b649e4E: %self"}
!24 = distinct !{!24, !"_ZN4core3cmp5impls54_$LT$impl$u20$core..cmp..PartialEq$u20$for$u20$u64$GT$2eq17hcb36ad6f45b649e4E"}
!25 = !{!26}
!26 = distinct !{!26, !24, !"_ZN4core3cmp5impls54_$LT$impl$u20$core..cmp..PartialEq$u20$for$u20$u64$GT$2eq17hcb36ad6f45b649e4E: %other"}
!27 = !{!23, !14, !9, !4}
!28 = !{!26, !17, !12, !7}
4 changes: 4 additions & 0 deletions mre.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#[no_mangle]
pub fn test(x: &[Option<u64>], y: &[Option<u64>]) -> bool {
x == y
}
43 changes: 43 additions & 0 deletions mre.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
.text
.file "mre.44c9d9ceb0d5fb53-cgu.0"
.section .text.test,"ax",@progbits
.globl test
.p2align 4, 0x90
.type test,@function
test:
.cfi_startproc
cmpq %rcx, %rsi
jne .LBB0_9
movb $1, %al
testq %rsi, %rsi
je .LBB0_10
movl $8, %ecx
jmp .LBB0_5
.p2align 4, 0x90
.LBB0_3:
orq %r8, %r9
jne .LBB0_9
.LBB0_4:
addq $16, %rcx
decq %rsi
je .LBB0_10
.LBB0_5:
movq -8(%rdi,%rcx), %r8
movq -8(%rdx,%rcx), %r9
testq %r8, %r8
je .LBB0_3
testq %r9, %r9
je .LBB0_3
movq (%rdi,%rcx), %r8
cmpq (%rdx,%rcx), %r8
je .LBB0_4
.LBB0_9:
xorl %eax, %eax
.LBB0_10:
retq
.Lfunc_end0:
.size test, .Lfunc_end0-test
.cfi_endproc

.ident "rustc version 1.74.0-beta.1 (b5c050feb 2023-10-03)"
.section ".note.GNU-stack","",@progbits

0 comments on commit 9348c33

Please sign in to comment.