Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FuncSpec] Update function specialization to handle phi-chains #71442

Closed
wants to merge 8 commits into from
3 changes: 3 additions & 0 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);

void discoverTransitivelyIncomingValues(DenseSet<PHINode *> &PhiNodes,
PHINode *PN, unsigned Depth);

Constant *visitInstruction(Instruction &I) { return nullptr; }
Constant *visitPHINode(PHINode &I);
Constant *visitFreezeInst(FreezeInst &I);
Expand Down
214 changes: 190 additions & 24 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));

static cl::opt<unsigned> MaxDiscoveryDepth(
"funcspec-max-discovery-depth", cl::init(10), cl::Hidden,
cl::desc("The maximum recursion depth allowed when searching for strongly "
"connected phis"));

static cl::opt<unsigned> MaxIncomingPhiValues(
"funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
"The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));
"funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
cl::desc("The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));

static cl::opt<unsigned> MaxBlockPredecessors(
"funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
Expand All @@ -64,9 +69,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
"much percent of the original function size"));

static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
"Reject specializations whose latency savings are less than this"
"much percent of the original function size"));
"funcspec-min-latency-savings", cl::init(40), cl::Hidden,
cl::desc("Reject specializations whose latency savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinInliningBonus(
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
Expand Down Expand Up @@ -262,30 +267,170 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
return estimateBasicBlocks(WorkList);
}

// This function is finding candidates for a PHINode is part of a chain or graph
// of PHINodes that all link to each other. That means, if the original input to
// the chain is a constant all the other values are also that constant.
//
// The caller of this function will later check that no other nodes are involved
// that are non-constant, and discard it from the possible conversions.
//
// For example:
//
// %a = load %0
// %c = phi [%a, %d]
// %d = phi [%e, %c]
// %e = phi [%c, %f]
// %f = phi [%j, %h]
// %j = phi [%h, %j]
// %h = phi [%g, %c]
//
// This is only showing the PHINodes, not the branches that choose the
// different paths.
//
// A depth limit is used to avoid extreme recurusion.
// A max number of incoming phi values ensures that expensive searches
// are avoided.
void InstCostVisitor::discoverTransitivelyIncomingValues(
DenseSet<PHINode *> &PHINodes, PHINode *PN, unsigned Depth) {
if (Depth > MaxDiscoveryDepth) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Discover PHI nodes too deep ("
<< Depth << ">" << MaxDiscoveryDepth << ")\n");
return;
}

if (PN->getNumIncomingValues() > MaxIncomingPhiValues) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: Discover PHI nodes has too many values ("
<< PN->getNumIncomingValues() << ">" << MaxIncomingPhiValues
<< ")\n");
return;
}

// Already seen this, no more processing needed.
if (!PHINodes.insert(PN).second)
return;

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);
if (auto *Phi = dyn_cast<PHINode>(V)) {
if (Phi == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;
discoverTransitivelyIncomingValues(PHINodes, Phi, Depth + 1);
}
}
}

Constant *InstCostVisitor::visitPHINode(PHINode &I) {
if (I.getNumIncomingValues() > MaxIncomingPhiValues)
return nullptr;

// PHI nodes
DenseSet<PHINode *> TransitivePHIs;

bool Inserted = VisitedPHIs.insert(&I).second;
Constant *Const = nullptr;
SmallVector<PHINode *, 8> UnknownIncomingValues;

auto canConstantFoldPhiTrivially = [&](PHINode *PN) -> Constant * {
Constant *Const = nullptr;

UnknownIncomingValues.clear();
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);

for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
Value *V = I.getIncomingValue(Idx);
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
Leporacanthicus marked this conversation as resolved.
Show resolved Hide resolved
if (!Const)
Const = C;
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return nullptr;
continue;
Constant *C = findConstantFor(V, KnownConstants);
if (!C) {
if (Inserted)
PendingPHIs.push_back(&I);
}
if (auto *Phi = dyn_cast<PHINode>(V)) {
UnknownIncomingValues.push_back(Phi);
continue;
}

// We can't reason about anything else.
return nullptr;
}
if (!Const)
Const = C;
else if (C != Const)
return UnknownIncomingValues.empty() ? Const : nullptr;
};

if (Constant *Const = canConstantFoldPhiTrivially(&I))
return Const;
Leporacanthicus marked this conversation as resolved.
Show resolved Hide resolved

if (Inserted) {
// First time we are seeing this phi. We'll retry later, after all
// the constant arguments have been propagated. Bail for now.
PendingPHIs.push_back(&I);
return nullptr;
}

// Try to see if we can collect a nest of transitive phis.
for (PHINode *Phi : UnknownIncomingValues)
discoverTransitivelyIncomingValues(TransitivePHIs, Phi, 1);

// A nested set of PHINodes can be constantfolded if:
// - It has a constant input.
// - It is always the SAME constant.
// - All the nodes are part of the nest, or a constant.
// Later we will check that the constant is always the same one.
auto canConstantFoldNestedPhi = [&](PHINode *PN, Constant *&Const) -> bool {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);
// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
if (!Const)
Const = C;

// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return false;
continue;
}
if (auto *Phi = dyn_cast<PHINode>(V)) {
// It's not a Transitive phi. Bail out.
if (!TransitivePHIs.contains(Phi))
return false;
continue;
}

// We can't reason about anything else.
return false;
}
return true;
};

// All TransitivePHIs have to be the SAME constant.
Constant *Retval = nullptr;
for (PHINode *Phi : TransitivePHIs) {
Constant *Const = nullptr;
if (canConstantFoldNestedPhi(Phi, Const)) {
if (Const) {
if (!Retval) {
Retval = Const;
continue;
}
// Found more than one constant, can't fold.
if (Retval != Const)
return nullptr;
}
}
// Found something "wrong", can't fold.
else
return nullptr;
}
return Const;

return Retval;
}

Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
Expand Down Expand Up @@ -809,20 +954,41 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
unsigned FuncGrowth) -> bool {
// No check required.
if (ForceSpecialization)
if (ForceSpecialization) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Force is on\n");
return true;
}
// Minimum inlining bonus.
if (Score > MinInliningBonus * FuncSize / 100)
if (Score > MinInliningBonus * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Sufficient inlining bonus (" << Score
<< " > " << MinInliningBonus * FuncSize / 100 << ")\n");
return true;
}
// Minimum codesize savings.
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Insufficient CodeSize Savings ("
<< B.CodeSize << " < "
<< MinCodeSizeSavings * FuncSize / 100 << ")\n");
return false;
}
// Minimum latency savings.
if (B.Latency < MinLatencySavings * FuncSize / 100)
if (B.Latency < MinLatencySavings * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Insufficient Latency Savings ("
<< B.Latency << " < " << MinLatencySavings * FuncSize / 100
<< ")\n");
return false;
}
// Maximum codesize growth.
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Function Growth exceeds threshold ("
<< FuncGrowth / FuncSize << " > " << MaxCodeSizeGrowth
<< ")\n");
return false;
}
return true;
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-depth=5 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
; FUNCSPEC-LABEL: define i64 @bar(
; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; FUNCSPEC-NEXT: entry:
; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; FUNCSPEC-NEXT: ret i64 [[ADD]]
;
; NOFUNCSPEC-LABEL: define i64 @bar(
; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; NOFUNCSPEC-NEXT: entry:
; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; NOFUNCSPEC-NEXT: ret i64 [[ADD]]
;
entry:
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%add = add i64 %f1, %f2
ret i64 %add
}

define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
entry:
br i1 %c1, label %l1, label %l9

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l1_75, label %l6

l1_75:
br i1 %c4, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ]
br label %l2

l4:
%phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ]
br i1 %c5, label %l3, label %l6

l5:
%phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br label %l4

l6:
%phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ]
br i1 %c6, label %l4, label %l6_5

l6_5:
br i1 %c7, label %l5, label %l8

l7:
%phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ]
br i1 %c8, label %l5, label %l8

l8:
%phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br i1 %c9, label %l7, label %l9

l9:
%phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ]
%sub = sub i64 %phi9, 1
%mul = mul i64 %sub, 2
br i1 %c10, label %l7, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l9]
ret i64 %res
}