diff options
| author | 2017-01-14 19:55:43 +0000 | |
|---|---|---|
| committer | 2017-01-14 19:55:43 +0000 | |
| commit | bd3306aecb3a15e8967143b8cdbbccf2b1b19b74 (patch) | |
| tree | 309a8132b44564b9e634c0da6815187ce8eab27c /gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | |
| parent | killp -a should not kill the window if only one pane. (diff) | |
| download | wireguard-openbsd-bd3306aecb3a15e8967143b8cdbbccf2b1b19b74.tar.xz wireguard-openbsd-bd3306aecb3a15e8967143b8cdbbccf2b1b19b74.zip | |
Import LLVM 3.9.1 including clang and lld.
Diffstat (limited to 'gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
| -rw-r--r-- | gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 115 |
1 files changed, 52 insertions, 63 deletions
diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index acfdec43d21..70bd9d3cca9 100644 --- a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -684,10 +684,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::prefetch: - case Intrinsic::x86_sse_storeu_ps: - case Intrinsic::x86_sse2_storeu_pd: - case Intrinsic::x86_sse2_storeu_dq: - case Intrinsic::x86_sse2_storel_dq: if (II->getArgOperand(0) == OperandVal) isAddress = true; break; @@ -704,18 +700,6 @@ static MemAccessTy getAccessType(const Instruction *Inst) { AccessTy.AddrSpace = SI->getPointerAddressSpace(); } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { AccessTy.AddrSpace = LI->getPointerAddressSpace(); - } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - // Addressing modes can also be folded into prefetches and a variety - // of intrinsics. - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::x86_sse_storeu_ps: - case Intrinsic::x86_sse2_storeu_pd: - case Intrinsic::x86_sse2_storeu_dq: - case Intrinsic::x86_sse2_storel_dq: - AccessTy.MemTy = II->getArgOperand(0)->getType(); - break; - } } // All pointers have the same requirements, so canonicalize them to an @@ -963,8 +947,8 @@ void Cost::RateRegister(const SCEV *Reg, isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) ++SetupCost; - NumIVMuls += isa<SCEVMulExpr>(Reg) && - SE.hasComputableLoopEvolution(Reg, L); + NumIVMuls += isa<SCEVMulExpr>(Reg) && + SE.hasComputableLoopEvolution(Reg, L); } /// Record this register in the set. If we haven't seen it before, rate @@ -2752,34 +2736,31 @@ void LSRInstance::CollectChains() { LatchPath.push_back(LoopHeader); // Walk the instruction stream from the loop header to the loop latch. - for (SmallVectorImpl<BasicBlock *>::reverse_iterator - BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend(); - BBIter != BBEnd; ++BBIter) { - for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end(); - I != E; ++I) { + for (BasicBlock *BB : reverse(LatchPath)) { + for (Instruction &I : *BB) { // Skip instructions that weren't seen by IVUsers analysis. - if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I)) + if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I)) continue; // Ignore users that are part of a SCEV expression. This way we only // consider leaf IV Users. This effectively rediscovers a portion of // IVUsers analysis but in program order this time. - if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I))) + if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I))) continue; // Remove this instruction from any NearUsers set it may be in. for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); ChainIdx < NChains; ++ChainIdx) { - ChainUsersVec[ChainIdx].NearUsers.erase(&*I); + ChainUsersVec[ChainIdx].NearUsers.erase(&I); } // Search for operands that can be chained. SmallPtrSet<Instruction*, 4> UniqueOperands; - User::op_iterator IVOpEnd = I->op_end(); - User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE); + User::op_iterator IVOpEnd = I.op_end(); + User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE); while (IVOpIter != IVOpEnd) { Instruction *IVOpInst = cast<Instruction>(*IVOpIter); if (UniqueOperands.insert(IVOpInst).second) - ChainInstruction(&*I, IVOpInst, ChainUsersVec); + ChainInstruction(&I, IVOpInst, ChainUsersVec); IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); } } // Continue walking down the instructions. @@ -4331,28 +4312,15 @@ BasicBlock::iterator LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl<Instruction *> &Inputs) const { + Instruction *Tentative = &*IP; for (;;) { - const Loop *IPLoop = LI.getLoopFor(IP->getParent()); - unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; - - BasicBlock *IDom; - for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { - if (!Rung) return IP; - Rung = Rung->getIDom(); - if (!Rung) return IP; - IDom = Rung->getBlock(); - - // Don't climb into a loop though. - const Loop *IDomLoop = LI.getLoopFor(IDom); - unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; - if (IDomDepth <= IPLoopDepth && - (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) - break; - } - bool AllDominate = true; Instruction *BetterPos = nullptr; - Instruction *Tentative = IDom->getTerminator(); + // Don't bother attempting to insert before a catchswitch, their basic block + // cannot have other non-PHI instructions. + if (isa<CatchSwitchInst>(Tentative)) + return IP; + for (Instruction *Inst : Inputs) { if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { AllDominate = false; @@ -4360,7 +4328,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, } // Attempt to find an insert position in the middle of the block, // instead of at the end, so that it can be used for other expansions. - if (IDom == Inst->getParent() && + if (Tentative->getParent() == Inst->getParent() && (!BetterPos || !DT.dominates(Inst, BetterPos))) BetterPos = &*std::next(BasicBlock::iterator(Inst)); } @@ -4370,6 +4338,26 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, IP = BetterPos->getIterator(); else IP = Tentative->getIterator(); + + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); + unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; + + BasicBlock *IDom; + for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { + if (!Rung) return IP; + Rung = Rung->getIDom(); + if (!Rung) return IP; + IDom = Rung->getBlock(); + + // Don't climb into a loop though. + const Loop *IDomLoop = LI.getLoopFor(IDom); + unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; + if (IDomDepth <= IPLoopDepth && + (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) + break; + } + + Tentative = IDom->getTerminator(); } return IP; @@ -4426,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, while (isa<PHINode>(IP)) ++IP; // Ignore landingpad instructions. - while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP; + while (IP->isEHPad()) ++IP; // Ignore debug intrinsics. while (isa<DbgInfoIntrinsic>(IP)) ++IP; @@ -4454,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Determine an input position which will be dominated by the operands and // which will dominate the result. IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); + Rewriter.setInsertPoint(&*IP); // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. @@ -4485,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); } // Expand the ScaledReg portion. @@ -4503,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand ScaleReg as if it was part of the base regs. if (F.Scale == 1) Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP))); + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4520,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); @@ -4536,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4546,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Flush the operand list to suppress SCEVExpander hoisting of both folded and // unfolded offsets. LSR assumes they both live next to their uses. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4582,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : SE.getAddExpr(Ops); - Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(FullS, Ty); // We're done expanding now, so reset the rewriter. Rewriter.clearPostInc(); @@ -4961,7 +4950,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_DEPENDENCY(IVUsers) +INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", @@ -4991,16 +4980,16 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // Requiring LoopSimplify a second time here prevents IVUsers from running // twice, since LoopSimplify was invalidated by running ScalarEvolution. AU.addRequiredID(LoopSimplifyID); - AU.addRequired<IVUsers>(); - AU.addPreserved<IVUsers>(); + AU.addRequired<IVUsersWrapperPass>(); + AU.addPreserved<IVUsersWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); } bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { - if (skipOptnoneFunction(L)) + if (skipLoop(L)) return false; - auto &IU = getAnalysis<IVUsers>(); + auto &IU = getAnalysis<IVUsersWrapperPass>().getIU(); auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
