From bdd6c96ad324dab0ca1b845ff8289b2b893de23a Mon Sep 17 00:00:00 2001 From: Stephen Clarke Date: Wed, 15 May 2024 15:07:22 +0100 Subject: [PATCH 1/2] [CHERI][InstCombine] Add test for 8-byte-capability-sized memcpy --- .../InstCombine/cheri-memcpy-64bit-cap.ll | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll diff --git a/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll b/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll new file mode 100644 index 000000000000..da3ad71c5c60 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +;; memcpy has capability size and alignment, and may copy a capability + +define ptr addrspace(200) @test_memcpy_cap(ptr addrspace(200) %d, ptr addrspace(200) %s) { +; CHECK-LABEL: @test_memcpy_cap( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 8 +; CHECK-NEXT: ret ptr addrspace(200) [[D]] +; + call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) align 8 %d, ptr addrspace(200) align 8 %s, i32 8, i1 false) must_preserve_cheri_tags + ret ptr addrspace(200) %d +} + +;; memcpy may contain a capability, but alignment is not known + +define ptr addrspace(200) @test_memcpy8_align_unknown(ptr addrspace(200) %d, ptr addrspace(200) %s) { +; CHECK-LABEL: @test_memcpy8_align_unknown( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 1 +; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 1 +; CHECK-NEXT: ret ptr addrspace(200) [[D]] +; + call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) %d, ptr addrspace(200) %s, i32 8, i1 false) must_preserve_cheri_tags + ret ptr addrspace(200) %d +} + +;; memcpy may contain a capability, but alignment may not be sufficient for capability load/store + +define ptr addrspace(200) @test_memcpy8_align_4(ptr addrspace(200) %d, ptr addrspace(200) %s) { +; CHECK-LABEL: @test_memcpy8_align_4( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 4 +; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 4 +; CHECK-NEXT: ret ptr addrspace(200) [[D]] +; + call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) align 4 %d, ptr addrspace(200) align 4 %s, i32 8, i1 false) must_preserve_cheri_tags + ret ptr addrspace(200) %d +} + +;; memcpy does not copy capabilities, so can be transformed to regular load/store + +define ptr addrspace(200) @test_memcpy8_nocap(ptr addrspace(200) %d, ptr addrspace(200) %s) { +; CHECK-LABEL: @test_memcpy8_nocap( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 8 +; CHECK-NEXT: ret ptr addrspace(200) [[D]] +; + call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) align 8 %d, ptr addrspace(200) align 8 %s, i32 8, i1 false) no_preserve_cheri_tags + ret ptr addrspace(200) %d +} + +;; memcpy has unknown alignment but does not copy capabilities, so can be transformed to regular load/store + +define ptr addrspace(200) @test_memcpy8_nocap_align_unknown(ptr addrspace(200) %d, ptr addrspace(200) %s) { +; CHECK-LABEL: @test_memcpy8_nocap_align_unknown( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 1 +; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 1 +; CHECK-NEXT: ret ptr addrspace(200) [[D]] +; + call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) %d, ptr addrspace(200) %s, i32 8, i1 false) no_preserve_cheri_tags + ret ptr addrspace(200) %d +} + +;; memcpy has align 4 and does not copy capabilities, so can be transformed to regular load/store + +define ptr addrspace(200) @test_memcpy8_nocap_align_4(ptr addrspace(200) %d, ptr addrspace(200) %s) { +; CHECK-LABEL: @test_memcpy8_nocap_align_4( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 4 +; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 4 +; CHECK-NEXT: ret ptr addrspace(200) [[D]] +; + call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) align 4 %d, ptr addrspace(200) align 4 %s, i32 8, i1 false) no_preserve_cheri_tags + ret ptr addrspace(200) %d +} + +declare void @llvm.memcpy.p200.p200.i32(ptr addrspace(200), ptr addrspace(200), i32, i1 immarg) From 735c06714a2ebec306da643f350b608e2e68fe72 Mon Sep 17 00:00:00 2001 From: Stephen Clarke Date: Wed, 15 May 2024 17:36:22 +0100 Subject: [PATCH 2/2] [CHERI][InstCombine] Fix 8-byte capability memcpy inlining --- .../InstCombine/InstCombineCalls.cpp | 22 +++++++++---------- .../InstCombine/cheri-memcpy-64bit-cap.ll | 10 ++++----- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8ab7ed62e660..4cd3eee98035 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -163,21 +163,21 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { assert(Size && "0-sized memory transferring should be removed already."); Type *CpyTy = nullptr; - if (Size > 8 || (Size&(Size-1))) { - // This heuristic is silly, because it prevents us from doing vector - // loads and stores. It also means that on CHERI we weren't optimising - // single-pointer copies. For now, special case pointer signed and aligned - // things for CHERI. - if (!DL.isFatPointer(200)) - return nullptr; // If not 1/2/4/8 bytes, exit. - uint64_t PtrCpySize = DL.getPointerSize(200); + // Special case pointer sized and aligned things for CHERI. + if (DL.isFatPointer(200) && Size == DL.getPointerSize(200) && + MI->shouldPreserveCheriTags() != PreserveCheriTags::Unnecessary) { + // May contain a capability Align PtrCpyAlign = DL.getPointerPrefAlignment(200); - if ((Size != PtrCpySize) || (CopyDstAlign && *CopyDstAlign < PtrCpyAlign) || - (CopySrcAlign && *CopySrcAlign < PtrCpyAlign)) + if (CopyDstAlign && *CopyDstAlign >= PtrCpyAlign && + CopySrcAlign && *CopySrcAlign >= PtrCpyAlign) + CpyTy = Type::getInt8PtrTy(MI->getContext(), 200); + else return nullptr; - CpyTy = Type::getInt8PtrTy(MI->getContext(), 200); } + if (!CpyTy && (Size > 8 || (Size&(Size-1)))) + return nullptr; // If not 1/2/4/8 bytes, exit. + // If it is an atomic and alignment is less than the size then we will // introduce the unaligned memory access which will be later transformed // into libcall in CodeGen. This is not evident performance gain so disable diff --git a/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll b/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll index da3ad71c5c60..b41fe0fa1a27 100644 --- a/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll +++ b/llvm/test/Transforms/InstCombine/cheri-memcpy-64bit-cap.ll @@ -6,8 +6,8 @@ target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G define ptr addrspace(200) @test_memcpy_cap(ptr addrspace(200) %d, ptr addrspace(200) %s) { ; CHECK-LABEL: @test_memcpy_cap( -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 8 -; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[S:%.*]], align 8 +; CHECK-NEXT: store ptr addrspace(200) [[TMP1]], ptr addrspace(200) [[D:%.*]], align 8 ; CHECK-NEXT: ret ptr addrspace(200) [[D]] ; call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) align 8 %d, ptr addrspace(200) align 8 %s, i32 8, i1 false) must_preserve_cheri_tags @@ -18,8 +18,7 @@ define ptr addrspace(200) @test_memcpy_cap(ptr addrspace(200) %d, ptr addrspace( define ptr addrspace(200) @test_memcpy8_align_unknown(ptr addrspace(200) %d, ptr addrspace(200) %s) { ; CHECK-LABEL: @test_memcpy8_align_unknown( -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 1 -; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) noundef nonnull align 1 dereferenceable(8) [[D:%.*]], ptr addrspace(200) noundef nonnull align 1 dereferenceable(8) [[S:%.*]], i32 8, i1 false) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: ret ptr addrspace(200) [[D]] ; call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) %d, ptr addrspace(200) %s, i32 8, i1 false) must_preserve_cheri_tags @@ -30,8 +29,7 @@ define ptr addrspace(200) @test_memcpy8_align_unknown(ptr addrspace(200) %d, ptr define ptr addrspace(200) @test_memcpy8_align_4(ptr addrspace(200) %d, ptr addrspace(200) %s) { ; CHECK-LABEL: @test_memcpy8_align_4( -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(200) [[S:%.*]], align 4 -; CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(200) [[D:%.*]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) noundef nonnull align 4 dereferenceable(8) [[D:%.*]], ptr addrspace(200) noundef nonnull align 4 dereferenceable(8) [[S:%.*]], i32 8, i1 false) #[[ATTR1]] ; CHECK-NEXT: ret ptr addrspace(200) [[D]] ; call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) align 4 %d, ptr addrspace(200) align 4 %s, i32 8, i1 false) must_preserve_cheri_tags