Skip to content

Commit

Permalink
GlobalISel: Lower funnel shifts
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Mar 23, 2021
1 parent 5949bd9 commit b24436a
Show file tree
Hide file tree
Showing 10 changed files with 17,927 additions and 75 deletions.
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@ class LegalizerHelper {
LegalizeResult lowerLoad(MachineInstr &MI);
LegalizeResult lowerStore(MachineInstr &MI);
LegalizeResult lowerBitCount(MachineInstr &MI);
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
LegalizeResult lowerFunnelShift(MachineInstr &MI);

LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
LegalizeResult lowerUITOFP(MachineInstr &MI);
Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,13 @@ class MachineIRBuilder {
return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
}

/// Build and insert \p Res = G_UREM \p Op0, \p Op1
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
return buildInstr(TargetOpcode::G_UREM, {Dst}, {Src0, Src1}, Flags);
}

MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,13 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);

/// Attempt to match a unary predicate against a scalar/splat constant or every
/// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source
/// value was undef.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match,
bool AllowUndefs = false);

/// Returns true if given the TargetLowering's boolean contents information,
/// the value \p Val contains a true value.
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
Expand Down
129 changes: 129 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3210,6 +3210,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_SDIVREM:
case G_UDIVREM:
return lowerDIVREM(MI);
case G_FSHL:
case G_FSHR:
return lowerFunnelShift(MI);
}
}

Expand Down Expand Up @@ -5207,6 +5210,132 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
}
}

// Check that (every element of) Reg is undef or not an exact multiple of BW.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
Register Reg, unsigned BW) {
return matchUnaryPredicate(
MRI, Reg,
[=](const Constant *C) {
// Null constant here means an undef.
const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
return !CI || CI->getValue().urem(BW) != 0;
},
/*AllowUndefs*/ true);
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
Register Y = MI.getOperand(2).getReg();
Register Z = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);

unsigned BW = Ty.getScalarSizeInBits();
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;

if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
// fshl X, Y, Z -> fshr X, Y, -Z
// fshr X, Y, Z -> fshl X, Y, -Z
auto Zero = MIRBuilder.buildConstant(ShTy, 0);
Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
} else {
// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
auto One = MIRBuilder.buildConstant(ShTy, 1);
if (IsFSHL) {
Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
} else {
X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
}

Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
}

MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
MI.eraseFromParent();
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
Register Y = MI.getOperand(2).getReg();
Register Z = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);

const unsigned BW = Ty.getScalarSizeInBits();
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;

Register ShX, ShY;
Register ShAmt, InvShAmt;

// FIXME: Emit optimized urem by constant instead of letting it expand later.
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
// fshl: X << C | Y >> (BW - C)
// fshr: X << (BW - C) | Y >> C
// where C = Z % BW is not zero
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
} else {
// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
if (isPowerOf2_32(BW)) {
// Z % BW -> Z & (BW - 1)
ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
auto NotZ = MIRBuilder.buildNot(ShTy, Z);
InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
} else {
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
}

auto One = MIRBuilder.buildConstant(ShTy, 1);
if (IsFSHL) {
ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
} else {
auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
}
}

MIRBuilder.buildOr(Dst, ShX, ShY);
MI.eraseFromParent();
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
// These operations approximately do the following (while avoiding undefined
// shifts by BW):
// G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(MI.getOperand(3).getReg());

bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
return lowerFunnelShiftAsShifts(MI);
return lowerFunnelShiftWithInverse(MI);
}

// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
// representation.
LegalizerHelper::LegalizeResult
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,38 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}

bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {

const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
return Match(nullptr);

// TODO: Also handle fconstant
if (Def->getOpcode() == TargetOpcode::G_CONSTANT)
return Match(Def->getOperand(1).getCImm());

if (Def->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
return false;

for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
Register SrcElt = Def->getOperand(I).getReg();
const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI);
if (AllowUndefs && SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) {
if (!Match(nullptr))
return false;
continue;
}

if (SrcDef->getOpcode() != TargetOpcode::G_CONSTANT ||
!Match(SrcDef->getOperand(1).getCImm()))
return false;
}

return true;
}

bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
bool IsFP) {
switch (TLI.getBooleanContents(IsVector, IsFP)) {
Expand Down
19 changes: 16 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1595,11 +1595,26 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S32, S64)
.lower();

// TODO: Only Try to form v2s16 with legal packed instructions.
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
.lowerFor({{V2S16, V2S16}})
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
.scalarize(0)
.lower();

if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_FSHL)
.lowerFor({{V2S16, V2S16}})
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
.scalarize(0)
.lower();
} else {
getActionDefinitionsBuilder(G_FSHL)
.scalarize(0)
.lower();
}

getActionDefinitionsBuilder(G_READCYCLECOUNTER)
.legalFor({S64});

Expand All @@ -1624,9 +1639,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_SADDO, G_SSUBO,

// TODO: Implement
G_FMINIMUM, G_FMAXIMUM,
G_FSHL
}).lower();
G_FMINIMUM, G_FMAXIMUM}).lower();

getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
Expand Down
Loading

0 comments on commit b24436a

Please sign in to comment.