Skip to content

Commit

Permalink
Reland [CodeGenPrepare] Convert ctpop(X) ==/!= 1 into ctpop(X) u<…
Browse files Browse the repository at this point in the history
…/u> 2/1` (#111284)` (#111998)

Relands #111284. Test failure with stage2 build has been fixed by
#111946.


Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After #100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix #95255.
  • Loading branch information
dtcxzyw authored Oct 15, 2024
1 parent 8225938 commit 637e81f
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 15 deletions.
28 changes: 28 additions & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,31 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
return false;
}

/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
/// result cannot be zero.
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
const TargetTransformInfo &TTI,
const DataLayout &DL) {
ICmpInst::Predicate Pred;
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
return false;
if (!ICmpInst::isEquality(Pred))
return false;
auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));

if (isKnownNonZero(II, DL)) {
if (Pred == ICmpInst::ICMP_EQ) {
Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
Cmp->setPredicate(ICmpInst::ICMP_ULT);
} else {
Cmp->setPredicate(ICmpInst::ICMP_UGT);
}
return true;
}
return false;
}

bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
Expand All @@ -2130,6 +2155,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
return true;

if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
return true;

return false;
}

Expand Down
68 changes: 61 additions & 7 deletions llvm/test/CodeGen/AArch64/arm64-popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
; CHECK-NONEON-LABEL: cnt32_advsimd:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
; CHECK-NONEON-NEXT: mov w8, #16843009
; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
Expand Down Expand Up @@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
; CHECK-NONEON-LABEL: cnt32_advsimd_2:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
; CHECK-NONEON-NEXT: mov w8, #16843009
; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
Expand Down Expand Up @@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
; CHECK-NONEON-LABEL: cnt64_advsimd:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
; CHECK-NONEON-NEXT: mov x8, #72340172838076673
; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
Expand Down Expand Up @@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt32:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w9, w0, #1
; CHECK-NEXT: mov w8, #16843009
; CHECK-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NEXT: and w9, w9, #0x55555555
; CHECK-NEXT: sub w9, w0, w9
; CHECK-NEXT: lsr w10, w9, #2
Expand All @@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-NONEON-LABEL: cnt32:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
; CHECK-NONEON-NEXT: mov w8, #16843009
; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
Expand All @@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt64:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x9, x0, #1
; CHECK-NEXT: mov x8, #72340172838076673
; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NEXT: sub x9, x0, x9
; CHECK-NEXT: lsr x10, x9, #2
Expand All @@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-NONEON-LABEL: cnt64:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
; CHECK-NONEON-NEXT: mov x8, #72340172838076673
; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
Expand Down Expand Up @@ -278,5 +278,59 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
ret i1 %cmp
}

define i1 @ctpop32_eq_one_nonzero(i32 %x) {
; CHECK-LABEL: ctpop32_eq_one_nonzero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub w8, w0, #1
; CHECK-NEXT: tst w0, w8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
; CHECK-NONEON: // %bb.0: // %entry
; CHECK-NONEON-NEXT: sub w8, w0, #1
; CHECK-NONEON-NEXT: tst w0, w8
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: ret
;
; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
; CHECK-CSSC: // %bb.0: // %entry
; CHECK-CSSC-NEXT: sub w8, w0, #1
; CHECK-CSSC-NEXT: tst w0, w8
; CHECK-CSSC-NEXT: cset w0, eq
; CHECK-CSSC-NEXT: ret
entry:
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}

define i1 @ctpop32_ne_one_nonzero(i32 %x) {
; CHECK-LABEL: ctpop32_ne_one_nonzero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub w8, w0, #1
; CHECK-NEXT: tst w0, w8
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
; CHECK-NONEON: // %bb.0: // %entry
; CHECK-NONEON-NEXT: sub w8, w0, #1
; CHECK-NONEON-NEXT: tst w0, w8
; CHECK-NONEON-NEXT: cset w0, ne
; CHECK-NONEON-NEXT: ret
;
; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
; CHECK-CSSC: // %bb.0: // %entry
; CHECK-CSSC-NEXT: sub w8, w0, #1
; CHECK-CSSC-NEXT: tst w0, w8
; CHECK-CSSC-NEXT: cset w0, ne
; CHECK-CSSC-NEXT: ret
entry:
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %popcnt, 1
ret i1 %cmp
}

declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
39 changes: 39 additions & 0 deletions llvm/test/CodeGen/RISCV/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1441,3 +1441,42 @@ define i32 @srai_slli2(i16 signext %0) {
%3 = sext i16 %sext to i32
ret i32 %3
}

define i1 @ctpop32_eq_one_nonzero(i32 %x) {
; RV32I-LABEL: ctpop32_eq_one_nonzero:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
; RV32ZBB: # %bb.0: # %entry
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: sltiu a0, a0, 2
; RV32ZBB-NEXT: ret
entry:
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}

define i1 @ctpop32_ne_one_nonzero(i32 %x) {
; RV32I-LABEL: ctpop32_ne_one_nonzero:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
; RV32ZBB: # %bb.0: # %entry
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: sltiu a0, a0, 2
; RV32ZBB-NEXT: xori a0, a0, 1
; RV32ZBB-NEXT: ret
entry:
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %popcnt, 1
ret i1 %cmp
}
81 changes: 81 additions & 0 deletions llvm/test/CodeGen/RISCV/rv64zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1618,3 +1618,84 @@ entry:
%5 = add nsw i32 %4, %0
ret i32 %5
}

define i1 @ctpop32_eq_one_nonzero(i32 %x) {
; RV64I-LABEL: ctpop32_eq_one_nonzero:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi a1, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 2
; RV64ZBB-NEXT: ret
entry:
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}

define i1 @ctpop32_ne_one_nonzero(i32 %x) {
; RV64I-LABEL: ctpop32_ne_one_nonzero:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi a1, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 2
; RV64ZBB-NEXT: xori a0, a0, 1
; RV64ZBB-NEXT: ret
entry:
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %popcnt, 1
ret i1 %cmp
}

define i1 @ctpop64_eq_one_nonzero(i64 %x) {
; RV64I-LABEL: ctpop64_eq_one_nonzero:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi a1, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: cpop a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 2
; RV64ZBB-NEXT: ret
entry:
%popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp eq i64 %popcnt, 1
ret i1 %cmp
}

define i1 @ctpop32_eq_one_maybezero(i32 %x) {
; RV64I-LABEL: ctpop32_eq_one_maybezero:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addiw a1, a0, -1
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sltu a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: addi a0, a0, -1
; RV64ZBB-NEXT: seqz a0, a0
; RV64ZBB-NEXT: ret
entry:
%popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}
45 changes: 43 additions & 2 deletions llvm/test/CodeGen/X86/ispow2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
; CHECK-AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; CHECK-AVX512-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; CHECK-AVX512-NEXT: vzeroupper
Expand Down Expand Up @@ -155,7 +155,7 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; CHECK-AVX512-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; CHECK-AVX512-NEXT: vzeroupper
Expand Down Expand Up @@ -220,3 +220,44 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
%r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i1> %r
}


define i1 @ctpop32_eq_one_nonzero(i32 %x) {
; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
; CHECK-NOBMI: # %bb.0: # %entry
; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
; CHECK-NOBMI-NEXT: testl %eax, %edi
; CHECK-NOBMI-NEXT: sete %al
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
; CHECK-BMI2: # %bb.0: # %entry
; CHECK-BMI2-NEXT: blsrl %edi, %eax
; CHECK-BMI2-NEXT: sete %al
; CHECK-BMI2-NEXT: retq
entry:
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}

define i1 @ctpop32_ne_one_nonzero(i32 %x) {
; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
; CHECK-NOBMI: # %bb.0: # %entry
; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
; CHECK-NOBMI-NEXT: testl %eax, %edi
; CHECK-NOBMI-NEXT: setne %al
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
; CHECK-BMI2: # %bb.0: # %entry
; CHECK-BMI2-NEXT: blsrl %edi, %eax
; CHECK-BMI2-NEXT: setne %al
; CHECK-BMI2-NEXT: retq
entry:
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %popcnt, 1
ret i1 %cmp
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/known-never-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -555,9 +555,9 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; X86-NEXT: por %xmm2, %xmm0
; X86-NEXT: pcmpeqd %xmm1, %xmm1
; X86-NEXT: paddd %xmm0, %xmm1
; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: pxor %xmm1, %xmm0
; X86-NEXT: pcmpgtd %xmm1, %xmm0
; X86-NEXT: pand %xmm1, %xmm0
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpeqd %xmm1, %xmm0
; X86-NEXT: psrld $31, %xmm0
; X86-NEXT: retl
;
Expand All @@ -566,10 +566,10 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-NEXT: vpminud %xmm1, %xmm0, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpsrld $31, %xmm0, %xmm0
; X64-NEXT: retq
%z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
%r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
Expand Down

0 comments on commit 637e81f

Please sign in to comment.