From 57b50ef0174b6fdf7b554c4ae2691e2fa4f78f96 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Wed, 17 Jan 2024 11:02:05 +0100 Subject: [PATCH] [AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode. (#71556) Substitute with zero-extended to i64 ballot.i32 intrinsic. --- clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl | 8 ++------ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- .../Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 13 +++++++++++++ .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 6 ++++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index 43553131f63c54..a0e27ce22fe7d9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -24,13 +24,11 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) } // CHECK-LABEL: @test_read_exec( -// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) void test_read_exec(global uint* out) { *out = __builtin_amdgcn_read_exec(); } -// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] - // CHECK-LABEL: @test_read_exec_lo( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) void test_read_exec_lo(global uint* out) { @@ -38,9 +36,7 @@ void test_read_exec_lo(global uint* out) { } // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) -// CHECK: lshr i64 [[A:%.*]], 32 -// CHECK: trunc i64 [[B:%.*]] to i32 +// CHECK: store i32 0, ptr addrspace(1) %out void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index e753b75dbbf492..d21a17b7f8a287 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2382,7 +2382,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { auto CC = cast(Cond->getOperand(2))->get(); if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isNullConstant(Cond->getOperand(1)) && - // TODO: make condition below an assert after fixing ballot bitwidth. + // We may encounter ballot.i64 in wave32 mode on -O0. VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) { // %VCMP = i(WaveSize) AMDGPUISD::SETCC ... // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 898289019c7189..3c2351673be5c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -990,6 +990,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType())); } } + if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) { + // %b64 = call i64 ballot.i64(...) + // => + // %b32 = call i32 ballot.i32(...) + // %b64 = zext i32 %b32 to i64 + Value *Call = IC.Builder.CreateZExt( + IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot, + {IC.Builder.getInt32Ty()}, + {II.getArgOperand(0)}), + II.getType()); + Call->takeName(&II); + return IC.replaceInstUsesWith(II, Call); + } break; } case Intrinsic::amdgcn_wqm_vote: { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 804283cc20cd6a..94c32e3cbe99f7 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2599,7 +2599,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent define i64 @ballot_nocombine_64(i1 %i) { ; CHECK-LABEL: @ballot_nocombine_64( -; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]]) +; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i) @@ -2616,7 +2617,8 @@ define i64 @ballot_zero_64() { define i64 @ballot_one_64() { ; CHECK-LABEL: @ballot_one_64( -; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)