summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp')
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp352
1 files changed, 249 insertions, 103 deletions
diff --git a/gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 4edea7cc3c8..5ebfbf8a879 100644
--- a/gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -59,12 +59,14 @@
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
using namespace PatternMatch;
@@ -73,9 +75,16 @@ using namespace PatternMatch;
STATISTIC(NumCallSiteSplit, "Number of call-site split");
-static void addNonNullAttribute(Instruction *CallI, Instruction *NewCallI,
- Value *Op) {
- CallSite CS(NewCallI);
+/// Only allow instructions before a call, if their CodeSize cost is below
+/// DuplicationThreshold. Those instructions need to be duplicated in all
+/// split blocks.
+static cl::opt<unsigned>
+ DuplicationThreshold("callsite-splitting-duplication-threshold", cl::Hidden,
+ cl::desc("Only allow instructions before a call, if "
+ "their cost is below DuplicationThreshold"),
+ cl::init(5));
+
+static void addNonNullAttribute(CallSite CS, Value *Op) {
unsigned ArgNo = 0;
for (auto &I : CS.args()) {
if (&*I == Op)
@@ -84,13 +93,16 @@ static void addNonNullAttribute(Instruction *CallI, Instruction *NewCallI,
}
}
-static void setConstantInArgument(Instruction *CallI, Instruction *NewCallI,
- Value *Op, Constant *ConstValue) {
- CallSite CS(NewCallI);
+static void setConstantInArgument(CallSite CS, Value *Op,
+ Constant *ConstValue) {
unsigned ArgNo = 0;
for (auto &I : CS.args()) {
- if (&*I == Op)
+ if (&*I == Op) {
+ // It is possible we have already added the non-null attribute to the
+ // parameter by using an earlier constraining condition.
+ CS.removeParamAttr(ArgNo, Attribute::NonNull);
CS.setArgument(ArgNo, ConstValue);
+ }
++ArgNo;
}
}
@@ -111,11 +123,13 @@ static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallSite CS) {
return false;
}
+typedef std::pair<ICmpInst *, unsigned> ConditionTy;
+typedef SmallVector<ConditionTy, 2> ConditionsTy;
+
/// If From has a conditional jump to To, add the condition to Conditions,
/// if it is relevant to any argument at CS.
-static void
-recordCondition(const CallSite &CS, BasicBlock *From, BasicBlock *To,
- SmallVectorImpl<std::pair<ICmpInst *, unsigned>> &Conditions) {
+static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To,
+ ConditionsTy &Conditions) {
auto *BI = dyn_cast<BranchInst>(From->getTerminator());
if (!BI || !BI->isConditional())
return;
@@ -134,11 +148,10 @@ recordCondition(const CallSite &CS, BasicBlock *From, BasicBlock *To,
}
/// Record ICmp conditions relevant to any argument in CS following Pred's
-/// single successors. If there are conflicting conditions along a path, like
+/// single predecessors. If there are conflicting conditions along a path, like
/// x == 1 and x == 0, the first condition will be used.
-static void
-recordConditions(const CallSite &CS, BasicBlock *Pred,
- SmallVectorImpl<std::pair<ICmpInst *, unsigned>> &Conditions) {
+static void recordConditions(CallSite CS, BasicBlock *Pred,
+ ConditionsTy &Conditions) {
recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
BasicBlock *From = Pred;
BasicBlock *To = Pred;
@@ -151,24 +164,17 @@ recordConditions(const CallSite &CS, BasicBlock *Pred,
}
}
-static Instruction *
-addConditions(CallSite &CS,
- SmallVectorImpl<std::pair<ICmpInst *, unsigned>> &Conditions) {
- if (Conditions.empty())
- return nullptr;
-
- Instruction *NewCI = CS.getInstruction()->clone();
+static void addConditions(CallSite CS, const ConditionsTy &Conditions) {
for (auto &Cond : Conditions) {
Value *Arg = Cond.first->getOperand(0);
Constant *ConstVal = cast<Constant>(Cond.first->getOperand(1));
if (Cond.second == ICmpInst::ICMP_EQ)
- setConstantInArgument(CS.getInstruction(), NewCI, Arg, ConstVal);
+ setConstantInArgument(CS, Arg, ConstVal);
else if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) {
assert(Cond.second == ICmpInst::ICMP_NE);
- addNonNullAttribute(CS.getInstruction(), NewCI, Arg);
+ addNonNullAttribute(CS, Arg);
}
}
- return NewCI;
}
static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
@@ -177,38 +183,90 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
return Preds;
}
-static bool canSplitCallSite(CallSite CS) {
+static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
// FIXME: As of now we handle only CallInst. InvokeInst could be handled
// without too much effort.
Instruction *Instr = CS.getInstruction();
if (!isa<CallInst>(Instr))
return false;
- // Allow splitting a call-site only when there is no instruction before the
- // call-site in the basic block. Based on this constraint, we only clone the
- // call instruction, and we do not move a call-site across any other
- // instruction.
BasicBlock *CallSiteBB = Instr->getParent();
- if (Instr != CallSiteBB->getFirstNonPHIOrDbg())
- return false;
-
// Need 2 predecessors and cannot split an edge from an IndirectBrInst.
SmallVector<BasicBlock *, 2> Preds(predecessors(CallSiteBB));
if (Preds.size() != 2 || isa<IndirectBrInst>(Preds[0]->getTerminator()) ||
isa<IndirectBrInst>(Preds[1]->getTerminator()))
return false;
- return CallSiteBB->canSplitPredecessors();
+ // BasicBlock::canSplitPredecessors is more agressive, so checking for
+ // BasicBlock::isEHPad as well.
+ if (!CallSiteBB->canSplitPredecessors() || CallSiteBB->isEHPad())
+ return false;
+
+ // Allow splitting a call-site only when the CodeSize cost of the
+ // instructions before the call is less then DuplicationThreshold. The
+ // instructions before the call will be duplicated in the split blocks and
+ // corresponding uses will be updated.
+ unsigned Cost = 0;
+ for (auto &InstBeforeCall :
+ llvm::make_range(CallSiteBB->begin(), Instr->getIterator())) {
+ Cost += TTI.getInstructionCost(&InstBeforeCall,
+ TargetTransformInfo::TCK_CodeSize);
+ if (Cost >= DuplicationThreshold)
+ return false;
+ }
+
+ return true;
+}
+
+static Instruction *cloneInstForMustTail(Instruction *I, Instruction *Before,
+ Value *V) {
+ Instruction *Copy = I->clone();
+ Copy->setName(I->getName());
+ Copy->insertBefore(Before);
+ if (V)
+ Copy->setOperand(0, V);
+ return Copy;
+}
+
+/// Copy mandatory `musttail` return sequence that follows original `CI`, and
+/// link it up to `NewCI` value instead:
+///
+/// * (optional) `bitcast NewCI to ...`
+/// * `ret bitcast or NewCI`
+///
+/// Insert this sequence right before `SplitBB`'s terminator, which will be
+/// cleaned up later in `splitCallSite` below.
+static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
+ Instruction *NewCI) {
+ bool IsVoid = SplitBB->getParent()->getReturnType()->isVoidTy();
+ auto II = std::next(CI->getIterator());
+
+ BitCastInst* BCI = dyn_cast<BitCastInst>(&*II);
+ if (BCI)
+ ++II;
+
+ ReturnInst* RI = dyn_cast<ReturnInst>(&*II);
+ assert(RI && "`musttail` call must be followed by `ret` instruction");
+
+ TerminatorInst *TI = SplitBB->getTerminator();
+ Value *V = NewCI;
+ if (BCI)
+ V = cloneInstForMustTail(BCI, TI, V);
+ cloneInstForMustTail(RI, TI, IsVoid ? nullptr : V);
+
+ // FIXME: remove TI here, `DuplicateInstructionsInSplitBetween` has a bug
+ // that prevents doing this now.
}
-/// Return true if the CS is split into its new predecessors which are directly
-/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2.
-/// CallInst1 and CallInst2 will be the new call-sites placed in the new
-/// predecessors split for PredBB1 and PredBB2, respectively.
+/// For each (predecessor, conditions from predecessors) pair, it will split the
+/// basic block containing the call site, hook it up to the predecessor and
+/// replace the call instruction with new call instructions, which contain
+/// constraints based on the conditions from their predecessors.
/// For example, in the IR below with an OR condition, the call-site can
-/// be split. Assuming PredBB1=Header and PredBB2=TBB, CallInst1 will be the
-/// call-site placed between Header and Tail, and CallInst2 will be the
-/// call-site between TBB and Tail.
+/// be split. In this case, Preds for Tail is [(Header, a == null),
+/// (TBB, a != null, b == null)]. Tail is replaced by 2 split blocks, containing
+/// CallInst1, which has constraints based on the conditions from Head and
+/// CallInst2, which has constraints based on the conditions coming from TBB.
///
/// From :
///
@@ -241,57 +299,112 @@ static bool canSplitCallSite(CallSite CS) {
/// Note that in case any arguments at the call-site are constrained by its
/// predecessors, new call-sites with more constrained arguments will be
/// created in createCallSitesOnPredicatedArgument().
-static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
- Instruction *CallInst1, Instruction *CallInst2) {
+static void splitCallSite(
+ CallSite CS,
+ const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
+ DominatorTree *DT) {
Instruction *Instr = CS.getInstruction();
BasicBlock *TailBB = Instr->getParent();
- assert(Instr == (TailBB->getFirstNonPHIOrDbg()) && "Unexpected call-site");
-
- BasicBlock *SplitBlock1 =
- SplitBlockPredecessors(TailBB, PredBB1, ".predBB1.split");
- BasicBlock *SplitBlock2 =
- SplitBlockPredecessors(TailBB, PredBB2, ".predBB2.split");
-
- assert((SplitBlock1 && SplitBlock2) && "Unexpected new basic block split.");
-
- if (!CallInst1)
- CallInst1 = Instr->clone();
- if (!CallInst2)
- CallInst2 = Instr->clone();
-
- CallInst1->insertBefore(&*SplitBlock1->getFirstInsertionPt());
- CallInst2->insertBefore(&*SplitBlock2->getFirstInsertionPt());
-
- CallSite CS1(CallInst1);
- CallSite CS2(CallInst2);
-
- // Handle PHIs used as arguments in the call-site.
- for (PHINode &PN : TailBB->phis()) {
- unsigned ArgNo = 0;
- for (auto &CI : CS.args()) {
- if (&*CI == &PN) {
- CS1.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock1));
- CS2.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock2));
+ bool IsMustTailCall = CS.isMustTailCall();
+
+ PHINode *CallPN = nullptr;
+
+ // `musttail` calls must be followed by optional `bitcast`, and `ret`. The
+ // split blocks will be terminated right after that so there're no users for
+ // this phi in a `TailBB`.
+ if (!IsMustTailCall && !Instr->use_empty())
+ CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call");
+
+ LLVM_DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");
+
+ assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2.");
+ // ValueToValueMapTy is neither copy nor moveable, so we use a simple array
+ // here.
+ ValueToValueMapTy ValueToValueMaps[2];
+ for (unsigned i = 0; i < Preds.size(); i++) {
+ BasicBlock *PredBB = Preds[i].first;
+ BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
+ TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i],
+ DT);
+ assert(SplitBlock && "Unexpected new basic block split.");
+
+ Instruction *NewCI =
+ &*std::prev(SplitBlock->getTerminator()->getIterator());
+ CallSite NewCS(NewCI);
+ addConditions(NewCS, Preds[i].second);
+
+ // Handle PHIs used as arguments in the call-site.
+ for (PHINode &PN : TailBB->phis()) {
+ unsigned ArgNo = 0;
+ for (auto &CI : CS.args()) {
+ if (&*CI == &PN) {
+ NewCS.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock));
+ }
+ ++ArgNo;
}
- ++ArgNo;
}
+ LLVM_DEBUG(dbgs() << " " << *NewCI << " in " << SplitBlock->getName()
+ << "\n");
+ if (CallPN)
+ CallPN->addIncoming(NewCI, SplitBlock);
+
+ // Clone and place bitcast and return instructions before `TI`
+ if (IsMustTailCall)
+ copyMustTailReturn(SplitBlock, Instr, NewCI);
}
+ NumCallSiteSplit++;
+
+ // FIXME: remove TI in `copyMustTailReturn`
+ if (IsMustTailCall) {
+ // Remove superfluous `br` terminators from the end of the Split blocks
+ // NOTE: Removing terminator removes the SplitBlock from the TailBB's
+ // predecessors. Therefore we must get complete list of Splits before
+ // attempting removal.
+ SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
+ assert(Splits.size() == 2 && "Expected exactly 2 splits!");
+ for (unsigned i = 0; i < Splits.size(); i++)
+ Splits[i]->getTerminator()->eraseFromParent();
+
+ // Erase the tail block once done with musttail patching
+ TailBB->eraseFromParent();
+ return;
+ }
+
+ auto *OriginalBegin = &*TailBB->begin();
// Replace users of the original call with a PHI mering call-sites split.
- if (Instr->getNumUses()) {
- PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call",
- TailBB->getFirstNonPHI());
- PN->addIncoming(CallInst1, SplitBlock1);
- PN->addIncoming(CallInst2, SplitBlock2);
- Instr->replaceAllUsesWith(PN);
+ if (CallPN) {
+ CallPN->insertBefore(OriginalBegin);
+ Instr->replaceAllUsesWith(CallPN);
+ }
+
+ // Remove instructions moved to split blocks from TailBB, from the duplicated
+ // call instruction to the beginning of the basic block. If an instruction
+ // has any uses, add a new PHI node to combine the values coming from the
+ // split blocks. The new PHI nodes are placed before the first original
+ // instruction, so we do not end up deleting them. By using reverse-order, we
+ // do not introduce unnecessary PHI nodes for def-use chains from the call
+ // instruction to the beginning of the block.
+ auto I = Instr->getReverseIterator();
+ while (I != TailBB->rend()) {
+ Instruction *CurrentI = &*I++;
+ if (!CurrentI->use_empty()) {
+ // If an existing PHI has users after the call, there is no need to create
+ // a new one.
+ if (isa<PHINode>(CurrentI))
+ continue;
+ PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
+ for (auto &Mapping : ValueToValueMaps)
+ NewPN->addIncoming(Mapping[CurrentI],
+ cast<Instruction>(Mapping[CurrentI])->getParent());
+ NewPN->insertBefore(&*TailBB->begin());
+ CurrentI->replaceAllUsesWith(NewPN);
+ }
+ CurrentI->eraseFromParent();
+ // We are done once we handled the first original instruction in TailBB.
+ if (CurrentI == OriginalBegin)
+ break;
}
- DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");
- DEBUG(dbgs() << " " << *CallInst1 << " in " << SplitBlock1->getName()
- << "\n");
- DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName()
- << "\n");
- Instr->eraseFromParent();
- NumCallSiteSplit++;
}
// Return true if the call-site has an argument which is a PHI with only
@@ -322,45 +435,59 @@ static bool isPredicatedOnPHI(CallSite CS) {
return false;
}
-static bool tryToSplitOnPHIPredicatedArgument(CallSite CS) {
+static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
if (!isPredicatedOnPHI(CS))
return false;
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
- splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr);
+ SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS = {
+ {Preds[0], {}}, {Preds[1], {}}};
+ splitCallSite(CS, PredsCS, DT);
return true;
}
-static bool tryToSplitOnPredicatedArgument(CallSite CS) {
+static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
if (Preds[0] == Preds[1])
return false;
- SmallVector<std::pair<ICmpInst *, unsigned>, 2> C1, C2;
- recordConditions(CS, Preds[0], C1);
- recordConditions(CS, Preds[1], C2);
+ SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
+ for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
+ ConditionsTy Conditions;
+ recordConditions(CS, Pred, Conditions);
+ PredsCS.push_back({Pred, Conditions});
+ }
- Instruction *CallInst1 = addConditions(CS, C1);
- Instruction *CallInst2 = addConditions(CS, C2);
- if (!CallInst1 && !CallInst2)
+ if (std::all_of(PredsCS.begin(), PredsCS.end(),
+ [](const std::pair<BasicBlock *, ConditionsTy> &P) {
+ return P.second.empty();
+ }))
return false;
- splitCallSite(CS, Preds[1], Preds[0], CallInst2, CallInst1);
+ splitCallSite(CS, PredsCS, DT);
return true;
}
-static bool tryToSplitCallSite(CallSite CS) {
- if (!CS.arg_size() || !canSplitCallSite(CS))
+static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
+ DominatorTree *DT) {
+ if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
return false;
- return tryToSplitOnPredicatedArgument(CS) ||
- tryToSplitOnPHIPredicatedArgument(CS);
+ return tryToSplitOnPredicatedArgument(CS, DT) ||
+ tryToSplitOnPHIPredicatedArgument(CS, DT);
}
-static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
+static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
+ TargetTransformInfo &TTI, DominatorTree *DT) {
bool Changed = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
BasicBlock &BB = *BI++;
- for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+ auto II = BB.getFirstNonPHIOrDbg()->getIterator();
+ auto IE = BB.getTerminator()->getIterator();
+ // Iterate until we reach the terminator instruction. tryToSplitCallSite
+ // can replace BB's terminator in case BB is a successor of itself. In that
+ // case, IE will be invalidated and we also have to check the current
+ // terminator.
+ while (II != IE && &*II != BB.getTerminator()) {
Instruction *I = &*II++;
CallSite CS(cast<Value>(I));
if (!CS || isa<IntrinsicInst>(I) || isInstructionTriviallyDead(I, &TLI))
@@ -369,7 +496,17 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
Function *Callee = CS.getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
- Changed |= tryToSplitCallSite(CS);
+
+ // Successful musttail call-site splits result in erased CI and erased BB.
+ // Check if such path is possible before attempting the splitting.
+ bool IsMustTail = CS.isMustTailCall();
+
+ Changed |= tryToSplitCallSite(CS, TTI, DT);
+
+ // There're no interesting instructions after this. The call site
+ // itself might have been erased on splitting.
+ if (IsMustTail)
+ break;
}
}
return Changed;
@@ -384,6 +521,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@@ -392,7 +531,10 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
return false;
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return doCallSiteSplitting(F, TLI);
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ return doCallSiteSplitting(F, TLI, TTI,
+ DTWP ? &DTWP->getDomTree() : nullptr);
}
};
} // namespace
@@ -401,6 +543,7 @@ char CallSiteSplittingLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
FunctionPass *llvm::createCallSiteSplittingPass() {
@@ -410,9 +553,12 @@ FunctionPass *llvm::createCallSiteSplittingPass() {
PreservedAnalyses CallSiteSplittingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
- if (!doCallSiteSplitting(F, TLI))
+ if (!doCallSiteSplitting(F, TLI, TTI, DT))
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}