[AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling #76644

snikitav · 2023-12-31T02:11:55Z

Fold BICi if all destination bits are already known to be zeroes

define <8 x i16> @haddu_known(<8 x i8> %a0, <8 x i8> %a1) {
  %x0 = zext <8 x i8> %a0 to <8 x i16>
  %x1 = zext <8 x i8> %a1 to <8 x i16>
  %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)

haddu_known:                            // @haddu_known
        ushll   v0.8h, v0.8b, #0
        ushll   v1.8h, v1.8b, #0
        uhadd   v0.8h, v0.8h, v1.8h
        bic     v0.8h, #254, lsl #8 <-- this one will be removed as we know high bits are zero extended
        ret

Fixes #53881
Fixes #53622

llvmbot · 2023-12-31T02:12:24Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: Sizov Nikita (snikitav)

Changes

Fold BICi if all destination bits are already known to be zeroes

define &lt;8 x i16&gt; @<!-- -->haddu_known(&lt;8 x i8&gt; %a0, &lt;8 x i8&gt; %a1) {
  %x0 = zext &lt;8 x i8&gt; %a0 to &lt;8 x i16&gt;
  %x1 = zext &lt;8 x i8&gt; %a1 to &lt;8 x i16&gt;
  %hadd = call &lt;8 x i16&gt; @<!-- -->llvm.aarch64.neon.uhadd.v8i16(&lt;8 x i16&gt; %x0, &lt;8 x i16&gt; %x1)
  %res = and &lt;8 x i16&gt; %hadd, &lt;i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511&gt;
  ret &lt;8 x i16&gt; %res
}
declare &lt;8 x i16&gt; @<!-- -->llvm.aarch64.neon.uhadd.v8i16(&lt;8 x i16&gt;, &lt;8 x i16&gt;)

haddu_known:                            // @<!-- -->haddu_known
        ushll   v0.8h, v0.8b, #<!-- -->0
        ushll   v1.8h, v1.8b, #<!-- -->0
        uhadd   v0.8h, v0.8h, v1.8h
        bic     v0.8h, #<!-- -->254, lsl #<!-- -->8 &lt;-- this one will be removed as we know high bits are zero extended
        ret

Full diff: https://github.com/llvm/llvm-project/pull/76644.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+7-2)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+24)
(added) llvm/test/CodeGen/AArch64/aarch64-neon-bici.ll (+125)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 81facf92e55ae9..c79d3d8246daaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3390,10 +3390,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
       Known = KnownBits::mulhs(Known, Known2);
     break;
   }
-  case ISD::AVGCEILU: {
+  case ISD::AVGFLOORU:
+  case ISD::AVGCEILU:
+  case ISD::AVGFLOORS:
+  case ISD::AVGCEILS: {
     Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-    Known = Known.zext(BitWidth + 1);
+    Known = (Opcode == ISD::AVGFLOORU || Opcode == ISD::AVGCEILU)
+                ? Known.zext(BitWidth + 1)
+                : Known.sext(BitWidth + 1);
     Known2 = Known2.zext(BitWidth + 1);
     KnownBits One = KnownBits::makeConstant(APInt(1, 1));
     Known = KnownBits::computeForAddCarry(Known, Known2, One);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dffe69bdb900db..c5f906aa7473f7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23672,6 +23672,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
       return R;
     return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+  case AArch64ISD::BICi: {
+    KnownBits Known;
+    APInt DemandedElts(32, N->getValueType(0).getVectorNumElements());
+    APInt EltSize(32, N->getValueType(0).getScalarSizeInBits());
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    if (SimplifyDemandedBitsForTargetNode(SDValue(N, 0), EltSize, DemandedElts,
+                                          Known, TLO, 0)) {
+      return TLO.New;
+    }
+    break;
+  }
   case ISD::XOR:
     return performXorCombine(N, DAG, DCI, Subtarget);
   case ISD::MUL:
@@ -26658,6 +26670,18 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
     // used - simplify to just Val.
     return TLO.CombineTo(Op, ShiftR->getOperand(0));
   }
+  case AArch64ISD::BICi: {
+    // Fold BICi if all destination bits already known to be zeroed
+    SDValue Op0 = Op.getOperand(0);
+    KnownBits KnownOp0 = TLO.DAG.computeKnownBits(Op0, 0);
+    APInt Shift = Op.getConstantOperandAPInt(2);
+    APInt Op1Val = Op.getConstantOperandAPInt(1);
+    APInt BitsToClear = Op1Val.shl(Shift).zextOrTrunc(KnownOp0.getBitWidth());
+    APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
+    if (AlreadyZeroedBitsToClear == BitsToClear)
+      return TLO.CombineTo(Op, Op0);
+    return false;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     if (auto ElementSize = IsSVECntIntrinsic(Op)) {
       unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-bici.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-bici.ll
new file mode 100644
index 00000000000000..55ac9c2f1b4075
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-bici.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-neon < %s | FileCheck %s
+
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: haddu_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %x0 = zext <8 x i8> %a0 to <8 x i16>
+  %x1 = zext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: rhaddu_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %x0 = zext <8 x i8> %a0 to <8 x i16>
+  %x1 = zext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: hadds_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %x0 = zext <8 x i8> %a0 to <8 x i16>
+  %x1 = zext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: shaddu_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %x0 = zext <8 x i8> %a0 to <8 x i16>
+  %x1 = zext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+; ; negative tests
+
+define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: haddu_sext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    bic v0.8h, #254, lsl #8
+; CHECK-NEXT:    ret
+  %x0 = sext <8 x i8> %a0 to <8 x i16>
+  %x1 = sext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: urhadd_sext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    bic v0.8h, #254, lsl #8
+; CHECK-NEXT:    ret
+  %x0 = sext <8 x i8> %a0 to <8 x i16>
+  %x1 = sext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: hadds_sext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    bic v0.8h, #254, lsl #8
+; CHECK-NEXT:    ret
+  %x0 = sext <8 x i8> %a0 to <8 x i16>
+  %x1 = sext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
+; CHECK-LABEL: shaddu_sext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    bic v0.8h, #254, lsl #8
+; CHECK-NEXT:    ret
+  %x0 = sext <8 x i8> %a0 to <8 x i16>
+  %x1 = sext <8 x i8> %a1 to <8 x i16>
+  %hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
+  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
+  ret <8 x i16> %res
+}

RKSimon · 2023-12-31T14:12:54Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+  case AArch64ISD::BICi: {
+    KnownBits Known;
+    APInt DemandedElts(32, N->getValueType(0).getVectorNumElements());
+    APInt EltSize(32, N->getValueType(0).getScalarSizeInBits());


Why is this hardcoded to 32?

RKSimon · 2023-12-31T14:14:29Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+    APInt EltSize(32, N->getValueType(0).getScalarSizeInBits());
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    if (SimplifyDemandedBitsForTargetNode(SDValue(N, 0), EltSize, DemandedElts,


Use TLI.SimplifyDemandedBits here

RKSimon · 2023-12-31T14:15:07Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+    SDValue Op0 = Op.getOperand(0);
+    KnownBits KnownOp0 = TLO.DAG.computeKnownBits(Op0, 0);
+    APInt Shift = Op.getConstantOperandAPInt(2);
+    APInt Op1Val = Op.getConstantOperandAPInt(1);


Just use getConstantOperandVal for these?

RKSimon · 2023-12-31T14:16:09Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+    APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
+    if (AlreadyZeroedBitsToClear == BitsToClear)
+      return TLO.CombineTo(Op, Op0);
+    return false;


Return the calculated KnownBits (copy the AArch64TargetLowering::computeKnownBitsForTargetNode implementation).

RKSimon · 2023-12-31T14:19:50Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+  case AArch64ISD::BICi: {
+    // Fold BICi if all destination bits already known to be zeroed
+    SDValue Op0 = Op.getOperand(0);
+    KnownBits KnownOp0 = TLO.DAG.computeKnownBits(Op0, 0);


Use the computeKnownBits variant with OriginalDemandedElts

Don't reset the recursion depth - it must be Depth + 1

RKSimon · 2024-02-19T21:16:04Z

@snikitav reverse-ping. Are you still looking at this?

snikitav · 2024-02-20T10:00:48Z

@snikitav reverse-ping. Are you still looking at this?

Yes. Sorry, been distracted. Will come up with patch this weekend.

RKSimon

Ideally we need to separate the ISD::AVG knownbits handling into a followup patch - @davemgreen can you suggest any simpler tests that will create BICi nodes that the target node KnownBits handling could show?

RKSimon · 2024-02-26T12:15:19Z

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

-    Known = Known.zext(BitWidth + 1);
+    Known = (Opcode == ISD::AVGFLOORU || Opcode == ISD::AVGCEILU)
+                ? Known.zext(BitWidth + 1)
+                : Known.sext(BitWidth + 1);
    Known2 = Known2.zext(BitWidth + 1);
    KnownBits One = KnownBits::makeConstant(APInt(1, 1));
    Known = KnownBits::computeForAddCarry(Known, Known2, One);


Floor variants don't add the One:

bool IsCeil = Opcode == ISD::AVGCEILU || Opcode == ISD::AVGCEILS); KnownBits Carry= KnownBits::makeConstant(APInt(1, IsCeil ? 1 : 0)); Known = KnownBits::computeForAddCarry(Known, Known2, Carry);

RKSimon · 2024-02-26T12:19:19Z

llvm/test/CodeGen/AArch64/aarch64-neon-bici.ll

+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)


These need pre-committing to trunk so the patch shows the codegen diff

Should I make a separate PR for this test?

Yeah that sounds good, if you don't have access to push them directly. Hopefully it is a simple one to get committed either way.

RKSimon · 2024-02-26T12:20:43Z

Also, please can you update the title of the patch to be more descriptive? Does this sound OK?

"[AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling"

davemgreen · 2024-02-26T12:27:23Z

Ideally we need to separate the ISD::AVG knownbits handling into a followup patch - @davemgreen can you suggest any simpler tests that will create BICi nodes that the target node KnownBits handling could show?

I'm not sure. Considering where they are created it will be common for simple cases to already be optimized. Maybe they could be added to llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp?

I agree it would be good if there were separate prs for the BIC vs RADD parts.

RKSimon

Please consult FoldValue in SelectionDAG.cpp which has the full evaluation for each of the AVG nodes

RKSimon · 2024-03-06T09:18:08Z

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-    Known = Known.zext(BitWidth + 1);
+    Known = IsSigned ? Known.sext(BitWidth + 1) : Known.zext(BitWidth + 1);
    Known2 = Known2.zext(BitWidth + 1);


Known2 = IsSigned ? Known2.sext(BitWidth + 1) : Known2.zext(BitWidth + 1);

BTW, shouldn't we sext/zext(BitWidth + 2) for Ceil versions?

Good point - we don't do this in FoldValue either

Actually, this seems fine as we don't care about the MSB: https://alive2.llvm.org/ce/z/iQdZVc panic over

BTW, shouldn't we sext/zext(BitWidth + 2) for Ceil versions?

It's not necessary even though the Ceil versions add 1 for carry. E.g. with 8 bits the maximum you could get would be 0xFF + 0xFF + 1 which is 0x1FF which still fits in 9 bits - you don't need 10 bits.

RKSimon · 2024-03-06T09:19:18Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+    uint64_t BitsToClear = Op->getConstantOperandVal(1)
+                           << Op->getConstantOperandVal(2);
+    APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
+    if (AlreadyZeroedBitsToClear == BitsToClear)


Use APInt::isSubsetOf ?

RKSimon · 2024-03-06T09:21:12Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

@@ -27324,6 +27337,21 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
    // used - simplify to just Val.
    return TLO.CombineTo(Op, ShiftR->getOperand(0));
  }
+  case AArch64ISD::BICi: {
+    // Fold BICi if all destination bits already known to be zeroed


Its always helpful to include in the comment what each of the immediate operands do, or name them:

uint64_t foo = Op->getConstantOperandVal(1); uint64_t bar = Op->getConstantOperandVal(2); uint64_t BitsToClear = foo << bar;

Add neon bici test for haddu and shadd, prerequisite for #76644

RKSimon · 2024-03-08T15:31:35Z

Any luck with creating a test in AArch64SelectionDAGTest.cpp for the BICi known bits?

RKSimon · 2024-03-12T12:46:15Z

llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp

@@ -796,4 +797,52 @@ TEST_F(AArch64SelectionDAGTest, computeKnownBits_extload_knownnegative) {
  EXPECT_EQ(Known.One, APInt(32, 0xfffffff0));
 }

+TEST_F(AArch64SelectionDAGTest,
+       computeKnownBits_AVGFLOORU_AVGFLOORS_AVGCEILU_AVGCEILS) {


I meant add BICi test coverage here, no need for the AVG nodes - the idea being that this PR can be purely about BICi demanded/knownbits and then you can create a followup PR that deals with the AVG known bits support

Or these tests could be used to create a PR just for the AVG nodes part - to get that part in and reviewed, and the BIC can be rebase on top.

Either is fine for me - I just want to see the generic ISD::AVG knownbits patch separate from the AArch64 specific BICi patch.

I'll do a separate patch then

@RKSimon @davemgreen #86754 PTAL

BTW, now this particular PR is temporary broken, because it relies on missing ISD::AVG* computeKnownBits

davemgreen · 2024-03-12T12:50:48Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+            .isSubsetOf(AlreadyZeroedBitsToClear))
+      return TLO.CombineTo(Op, Op0);
+
+    Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));


Could this be Known = KnownOp0 & KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));? There is already some code for this in AArch64TargetLowering::computeKnownBitsForTargetNode, I would guess these should match in terms of what they produce, but sometimes I mis-understand exactly what SimplifyDemandedBitsForTargetNode should be doing. I think this could be quite similar to the ISD::AND case inside SimplifyDemandedBits, just with an always-constant second argument.

davemgreen · 2024-03-12T12:53:13Z

llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp

@@ -796,4 +797,52 @@ TEST_F(AArch64SelectionDAGTest, computeKnownBits_extload_knownnegative) {
  EXPECT_EQ(Known.One, APInt(32, 0xfffffff0));
 }

+TEST_F(AArch64SelectionDAGTest,


Thanks for adding these, they are useful for checking the produced results more precisely. Would it be possible to add something for the BICi changes too?

snikitav · 2024-03-26T23:05:57Z

Any luck with creating a test in AArch64SelectionDAGTest.cpp for the BICi known bits?

Are you sure? I don't see any AArch64ISD namespace using in AArch64SelectionDAGTest.cpp. I feel like missing something.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

…86754) knownBits calculation for **AVGFLOORU** / **AVGFLOORS** / **AVGCEILU** / **AVGCEILS** instructions Prerequisite for #76644

RKSimon · 2024-04-04T17:20:57Z

@snikitav reverse-ping

snikitav · 2024-04-05T20:06:52Z

Any luck with creating a test in AArch64SelectionDAGTest.cpp for the BICi known bits?

@RKSimon Are you sure? I don't see any AArch64ISD namespace using in AArch64SelectionDAGTest.cpp. I feel like missing something. Could you give me a bit of guidance here?

RKSimon · 2024-04-05T20:13:07Z

I don't think that will be necessary anymore unless @davemgreen disagrees?

RKSimon

LGTM

davemgreen

AArch64SelectionDAGTest.cpp

I see what you mean, with them being specified in AArch64ISelLowering.h. I think so long as this is tested and looks sensible it should be OK (the C++ tests just make it easier to write more comprehensive tests).

If you can remove the unused variable then this LGTM too.

davemgreen · 2024-04-06T13:27:53Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

@@ -24555,6 +24555,19 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
      return R;
    return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+  case AArch64ISD::BICi: {
+    KnownBits Known;


Known isn't used?

davemgreen · 2024-04-06T14:00:18Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+  case AArch64ISD::BICi: {
+    KnownBits Known;
+    APInt DemandedBits =
+        APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());


I would expect this not be all bits, but to only demand the bits that are not cleared by the BIC. Again this might be difficult to test though, so it might be best to leave it as-is for the moment.

davemgreen

Thanks. LGTM

llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels Dec 31, 2023

snikitav force-pushed the aarch64-implement-bic-handling branch from 08f70b5 to 00fd477 Compare December 31, 2023 02:13

snikitav changed the title ~~Missing AArch64ISD::BIC & AArch64ISD::BICi handling~~ Missing AArch64ISD::BICi handling Dec 31, 2023

snikitav mentioned this pull request Dec 31, 2023

SimplifyDemandedBitsForTargetNode - Missing AArch64ISD::BIC & AArch64ISD::BICi handling #53881

Closed

RKSimon requested review from RKSimon and davemgreen December 31, 2023 14:11

RKSimon reviewed Dec 31, 2023

View reviewed changes

snikitav force-pushed the aarch64-implement-bic-handling branch 3 times, most recently from c220a3e to 40ce707 Compare February 25, 2024 23:42

snikitav requested a review from RKSimon February 26, 2024 09:48

RKSimon reviewed Feb 26, 2024

View reviewed changes

snikitav changed the title ~~Missing AArch64ISD::BICi handling~~ [AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling Mar 5, 2024

snikitav mentioned this pull request Mar 5, 2024

[AArch64] Add neon bici test for haddu and shadd #84073

Merged

snikitav force-pushed the aarch64-implement-bic-handling branch 2 times, most recently from 53709ee to f1f275e Compare March 5, 2024 22:27

snikitav requested a review from RKSimon March 5, 2024 22:27

RKSimon requested changes Mar 6, 2024

View reviewed changes

snikitav force-pushed the aarch64-implement-bic-handling branch from f1f275e to 5a1ca5a Compare March 7, 2024 01:07

RKSimon pushed a commit that referenced this pull request Mar 8, 2024

[AArch64] Add neon bici test for haddu and shadd (#84073)

ef1eb03

Add neon bici test for haddu and shadd, prerequisite for #76644

snikitav force-pushed the aarch64-implement-bic-handling branch from 5a1ca5a to afa5a78 Compare March 8, 2024 11:00

snikitav requested a review from RKSimon March 8, 2024 11:02

RKSimon mentioned this pull request Mar 9, 2024

[Support] Add KnownBits implementations for avgFloor and avgCeil #84640

Closed

3 tasks

snikitav force-pushed the aarch64-implement-bic-handling branch from afa5a78 to 6ea8481 Compare March 11, 2024 22:47

RKSimon reviewed Mar 12, 2024

View reviewed changes

davemgreen reviewed Mar 12, 2024

View reviewed changes

snikitav force-pushed the aarch64-implement-bic-handling branch from 6ea8481 to 1e04454 Compare March 26, 2024 23:29

snikitav mentioned this pull request Mar 27, 2024

[SelectionDAG] implement computeKnownBits for add AVG* instructions #86754

Merged

snikitav force-pushed the aarch64-implement-bic-handling branch from 1e04454 to c155e8f Compare March 27, 2024 01:21

RKSimon reviewed Mar 27, 2024

View reviewed changes

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Outdated Show resolved Hide resolved

RKSimon pushed a commit that referenced this pull request Apr 2, 2024

[SelectionDAG] implement computeKnownBits for add AVG* instructions (#…

6654235

…86754) knownBits calculation for **AVGFLOORU** / **AVGFLOORS** / **AVGCEILU** / **AVGCEILS** instructions Prerequisite for #76644

snikitav force-pushed the aarch64-implement-bic-handling branch from c155e8f to f02f7b5 Compare April 5, 2024 20:04

RKSimon approved these changes Apr 5, 2024

View reviewed changes

davemgreen reviewed Apr 6, 2024

View reviewed changes

Missing AArch64ISD::BICi handling

dc85ce6

snikitav force-pushed the aarch64-implement-bic-handling branch from f02f7b5 to dc85ce6 Compare April 6, 2024 19:47

snikitav requested a review from davemgreen April 6, 2024 19:47

davemgreen approved these changes Apr 6, 2024

View reviewed changes

davemgreen merged commit d38bff4 into llvm:main Apr 6, 2024
3 of 4 checks passed

[AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling #76644

[AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling #76644

Conversation

snikitav commented Dec 31, 2023 • edited by RKSimon Loading

llvmbot commented Dec 31, 2023 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon commented Feb 19, 2024

snikitav commented Feb 20, 2024

RKSimon left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon commented Feb 26, 2024 • edited Loading

davemgreen commented Feb 26, 2024

RKSimon left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon commented Mar 8, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

snikitav Mar 27, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

snikitav commented Mar 26, 2024

RKSimon commented Apr 4, 2024

snikitav commented Apr 5, 2024

RKSimon commented Apr 5, 2024

RKSimon left a comment

Choose a reason for hiding this comment

davemgreen left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

davemgreen left a comment

Choose a reason for hiding this comment

snikitav commented Dec 31, 2023 •

edited by RKSimon

Loading

llvmbot commented Dec 31, 2023 •

edited

Loading

RKSimon commented Feb 26, 2024 •

edited

Loading

snikitav Mar 27, 2024 •

edited

Loading