summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
authorpatrick <patrick@openbsd.org>2017-01-14 19:55:43 +0000
committerpatrick <patrick@openbsd.org>2017-01-14 19:55:43 +0000
commitbd3306aecb3a15e8967143b8cdbbccf2b1b19b74 (patch)
tree309a8132b44564b9e634c0da6815187ce8eab27c /gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
parentkillp -a should not kill the window if only one pane. (diff)
downloadwireguard-openbsd-bd3306aecb3a15e8967143b8cdbbccf2b1b19b74.tar.xz
wireguard-openbsd-bd3306aecb3a15e8967143b8cdbbccf2b1b19b74.zip
Import LLVM 3.9.1 including clang and lld.
Diffstat (limited to 'gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp115
1 files changed, 52 insertions, 63 deletions
diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index acfdec43d21..70bd9d3cca9 100644
--- a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -684,10 +684,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
@@ -704,18 +700,6 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
- } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- AccessTy.MemTy = II->getArgOperand(0)->getType();
- break;
- }
}
// All pointers have the same requirements, so canonicalize them to an
@@ -963,8 +947,8 @@ void Cost::RateRegister(const SCEV *Reg,
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
- NumIVMuls += isa<SCEVMulExpr>(Reg) &&
- SE.hasComputableLoopEvolution(Reg, L);
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ SE.hasComputableLoopEvolution(Reg, L);
}
/// Record this register in the set. If we haven't seen it before, rate
@@ -2752,34 +2736,31 @@ void LSRInstance::CollectChains() {
LatchPath.push_back(LoopHeader);
// Walk the instruction stream from the loop header to the loop latch.
- for (SmallVectorImpl<BasicBlock *>::reverse_iterator
- BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
- BBIter != BBEnd; ++BBIter) {
- for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
- I != E; ++I) {
+ for (BasicBlock *BB : reverse(LatchPath)) {
+ for (Instruction &I : *BB) {
// Skip instructions that weren't seen by IVUsers analysis.
- if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I))
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
continue;
// Ignore users that are part of a SCEV expression. This way we only
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
- if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I)))
+ if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
- ChainUsersVec[ChainIdx].NearUsers.erase(&*I);
+ ChainUsersVec[ChainIdx].NearUsers.erase(&I);
}
// Search for operands that can be chained.
SmallPtrSet<Instruction*, 4> UniqueOperands;
- User::op_iterator IVOpEnd = I->op_end();
- User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+ User::op_iterator IVOpEnd = I.op_end();
+ User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst).second)
- ChainInstruction(&*I, IVOpInst, ChainUsersVec);
+ ChainInstruction(&I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
@@ -4331,28 +4312,15 @@ BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
+ Instruction *Tentative = &*IP;
for (;;) {
- const Loop *IPLoop = LI.getLoopFor(IP->getParent());
- unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
-
- BasicBlock *IDom;
- for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
- if (!Rung) return IP;
- Rung = Rung->getIDom();
- if (!Rung) return IP;
- IDom = Rung->getBlock();
-
- // Don't climb into a loop though.
- const Loop *IDomLoop = LI.getLoopFor(IDom);
- unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
- if (IDomDepth <= IPLoopDepth &&
- (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
- break;
- }
-
bool AllDominate = true;
Instruction *BetterPos = nullptr;
- Instruction *Tentative = IDom->getTerminator();
+ // Don't bother attempting to insert before a catchswitch, their basic block
+ // cannot have other non-PHI instructions.
+ if (isa<CatchSwitchInst>(Tentative))
+ return IP;
+
for (Instruction *Inst : Inputs) {
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
@@ -4360,7 +4328,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
- if (IDom == Inst->getParent() &&
+ if (Tentative->getParent() == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = &*std::next(BasicBlock::iterator(Inst));
}
@@ -4370,6 +4338,26 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
IP = BetterPos->getIterator();
else
IP = Tentative->getIterator();
+
+ const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+ unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+ BasicBlock *IDom;
+ for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+ if (!Rung) return IP;
+ Rung = Rung->getIDom();
+ if (!Rung) return IP;
+ IDom = Rung->getBlock();
+
+ // Don't climb into a loop though.
+ const Loop *IDomLoop = LI.getLoopFor(IDom);
+ unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+ if (IDomDepth <= IPLoopDepth &&
+ (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+ break;
+ }
+
+ Tentative = IDom->getTerminator();
}
return IP;
@@ -4426,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
while (isa<PHINode>(IP)) ++IP;
// Ignore landingpad instructions.
- while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP;
+ while (IP->isEHPad()) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -4454,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
+ Rewriter.setInsertPoint(&*IP);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
@@ -4485,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}
// Expand the ScaledReg portion.
@@ -4503,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
- SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
@@ -4520,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4536,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4546,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4582,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
- Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
@@ -4961,7 +4950,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
@@ -4991,16 +4980,16 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<IVUsers>();
- AU.addPreserved<IVUsers>();
+ AU.addRequired<IVUsersWrapperPass>();
+ AU.addPreserved<IVUsersWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
- auto &IU = getAnalysis<IVUsers>();
+ auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();