diff options
| author | 2017-01-24 08:32:59 +0000 | |
|---|---|---|
| committer | 2017-01-24 08:32:59 +0000 | |
| commit | 53d771aafdbe5b919f264f53cba3788e2c4cffd2 (patch) | |
| tree | 7eca39498be0ff1e3a6daf583cd9ca5886bb2636 /gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp | |
| parent | In preparation of compiling our kernels with -ffreestanding, explicitly map (diff) | |
| download | wireguard-openbsd-53d771aafdbe5b919f264f53cba3788e2c4cffd2.tar.xz wireguard-openbsd-53d771aafdbe5b919f264f53cba3788e2c4cffd2.zip | |
Import LLVM 4.0.0 rc1 including clang and lld to help the current
development effort on OpenBSD/arm64.
Diffstat (limited to 'gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp')
| -rw-r--r-- | gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp | 68 |
1 files changed, 52 insertions, 16 deletions
diff --git a/gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 60b27dd75a8..6d643457e9a 100644 --- a/gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/gnu/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -70,17 +70,28 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <memory> #include <utility> + using namespace llvm; #define DEBUG_TYPE "peephole-opt" @@ -118,6 +129,7 @@ STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { + class ValueTrackerResult; class PeepholeOptimizer : public MachineFunctionPass { @@ -128,6 +140,7 @@ namespace { public: static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) { initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -390,10 +403,12 @@ namespace { /// register of the last source. unsigned getReg() const { return Reg; } }; -} + +} // end anonymous namespace char PeepholeOptimizer::ID = 0; char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; + INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -737,6 +752,7 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, } namespace { + /// \brief Helper class to rewrite the arguments of a copy-like instruction. class CopyRewriter { protected: @@ -820,7 +836,6 @@ public: TargetInstrInfo::RegSubRegPair Def, PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources = true) { - TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); do { ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); @@ -859,7 +874,7 @@ public: const MachineOperand &MODef = NewPHI->getOperand(0); return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - } while (1); + } while (true); return TargetInstrInfo::RegSubRegPair(0, 0); } @@ -1001,6 +1016,7 @@ public: TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); return true; } + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { if (CurrentSrcIdx != 2) return false; @@ -1141,7 +1157,8 @@ public: return true; } }; -} // End namespace. + +} // end anonymous namespace /// \brief Get the appropriated CopyRewriter for \p MI. /// \return A pointer to a dynamically allocated CopyRewriter or nullptr @@ -1523,11 +1540,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - // If we run into an instruction we can't fold across, discard - // the load candidates. - if (MI->isLoadFoldBarrier()) - FoldAsLoadDefCandidates.clear(); - if (MI->isPosition() || MI->isPHI()) continue; @@ -1571,7 +1583,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI << '\n'); NAPhysToVirtMIs.clear(); - continue; } if ((isUncoalescableCopy(*MI) && @@ -1622,8 +1633,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && !FoldAsLoadDefCandidates.empty()) { + + // We visit each operand even after successfully folding a previous + // one. This allows us to fold multiple loads into a single + // instruction. We do assume that optimizeLoadInstr doesn't insert + // foldable uses earlier in the argument list. Since we don't restart + // iteration, we'd miss such cases. const MCInstrDesc &MIDesc = MI->getDesc(); - for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); + for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) { const MachineOperand &MOp = MI->getOperand(i); if (!MOp.isReg()) @@ -1650,13 +1667,23 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MRI->markUsesInDebugValueAsUndef(FoldedReg); FoldAsLoadDefCandidates.erase(FoldedReg); ++NumLoadFold; - // MI is replaced with FoldMI. + + // MI is replaced with FoldMI so we can continue trying to fold Changed = true; - break; + MI = FoldMI; } } } } + + // If we run into an instruction we can't fold across, discard + // the load candidates. Note: We might be able to fold *into* this + // instruction, so this needs to be after the folding logic. + if (MI->isLoadFoldBarrier()) { + DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n"); + FoldAsLoadDefCandidates.clear(); + } + } } @@ -1688,7 +1715,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { // Bitcasts with more than one def are not supported. if (Def->getDesc().getNumDefs() != 1) return ValueTrackerResult(); - if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + const MachineOperand DefOp = Def->getOperand(DefIdx); + if (DefOp.getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of the src. // Bails as we do not support composing subregs yet. return ValueTrackerResult(); @@ -1708,6 +1736,14 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { return ValueTrackerResult(); SrcIdx = OpIdx; } + + // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY + // will break the assumed guarantees for the upper bits. + for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) { + if (UseMI.isSubregToReg()) + return ValueTrackerResult(); + } + const MachineOperand &Src = Def->getOperand(SrcIdx); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -1806,8 +1842,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { // sub-register we are tracking. const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); if (!TRI || - (TRI->getSubRegIndexLaneMask(DefSubReg) & - TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) + !(TRI->getSubRegIndexLaneMask(DefSubReg) & + TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none()) return ValueTrackerResult(); // At this point, the value is available in v0 via the same subreg // we used for Def. |
