diff options
Diffstat (limited to 'gnu/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp')
| -rw-r--r-- | gnu/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 309 |
1 files changed, 212 insertions, 97 deletions
diff --git a/gnu/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/gnu/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 0aada8a53c9..6f1f6c46a10 100644 --- a/gnu/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/gnu/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -53,14 +53,6 @@ static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near", static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true)); -static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Hexagon Double Vector eXtensions")); - -static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Hexagon Vector eXtensions")); - static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false)); @@ -87,68 +79,233 @@ static cl::opt<bool> EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Consider calls to be predicable")); -void HexagonSubtarget::initializeEnvironment() { - UseMemOps = false; - ModeIEEERndNear = false; - UseBSBScheduling = false; +static cl::opt<bool> SchedPredsCloser("sched-preds-closer", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict", + cl::Hidden, cl::ZeroOrMore, cl::init(true), + cl::desc("Enable checking for cache bank conflicts")); + + +HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, + StringRef FS, const TargetMachine &TM) + : HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()), + CPUString(Hexagon_MC::selectHexagonCPU(CPU)), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), + RegInfo(getHwMode()), TLInfo(TM, *this), + InstrItins(getInstrItineraryForCPU(CPUString)) { + // Beware of the default constructor of InstrItineraryData: it will + // reset all members to 0. + assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized"); } HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - CPUString = Hexagon_MC::selectHexagonCPU(getTargetTriple(), CPU); - - static std::map<StringRef, HexagonArchEnum> CpuTable { - { "hexagonv4", V4 }, - { "hexagonv5", V5 }, - { "hexagonv55", V55 }, - { "hexagonv60", V60 }, - { "hexagonv62", V62 }, + static std::map<StringRef, Hexagon::ArchEnum> CpuTable{ + {"hexagonv4", Hexagon::ArchEnum::V4}, + {"hexagonv5", Hexagon::ArchEnum::V5}, + {"hexagonv55", Hexagon::ArchEnum::V55}, + {"hexagonv60", Hexagon::ArchEnum::V60}, + {"hexagonv62", Hexagon::ArchEnum::V62}, + {"hexagonv65", Hexagon::ArchEnum::V65}, }; - auto foundIt = CpuTable.find(CPUString); - if (foundIt != CpuTable.end()) - HexagonArchVersion = foundIt->second; + auto FoundIt = CpuTable.find(CPUString); + if (FoundIt != CpuTable.end()) + HexagonArchVersion = FoundIt->second; else llvm_unreachable("Unrecognized Hexagon processor version"); - UseHVXOps = false; - UseHVXDblOps = false; + UseHVX128BOps = false; + UseHVX64BOps = false; UseLongCalls = false; + + UseMemOps = DisableMemOps ? false : EnableMemOps; + ModeIEEERndNear = EnableIEEERndNear; + UseBSBScheduling = hasV60TOps() && EnableBSBSched; + ParseSubtargetFeatures(CPUString, FS); - if (EnableHexagonHVX.getPosition()) - UseHVXOps = EnableHexagonHVX; - if (EnableHexagonHVXDouble.getPosition()) - UseHVXDblOps = EnableHexagonHVXDouble; if (OverrideLongCalls.getPosition()) UseLongCalls = OverrideLongCalls; + FeatureBitset Features = getFeatureBits(); + if (HexagonDisableDuplex) + setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); + setFeatureBits(Hexagon_MC::completeHVXFeatures(Features)); + return *this; } -HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, - StringRef FS, const TargetMachine &TM) - : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { - initializeEnvironment(); - - // Initialize scheduling itinerary for the specified CPU. - InstrItins = getInstrItineraryForCPU(CPUString); - - // UseMemOps on by default unless disabled explicitly - if (DisableMemOps) - UseMemOps = false; - else if (EnableMemOps) - UseMemOps = true; - else - UseMemOps = false; +void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) { + for (SUnit &SU : DAG->SUnits) { + if (!SU.isInstr()) + continue; + SmallVector<SDep, 4> Erase; + for (auto &D : SU.Preds) + if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF) + Erase.push_back(D); + for (auto &E : Erase) + SU.removePred(E); + } +} - if (EnableIEEERndNear) - ModeIEEERndNear = true; - else - ModeIEEERndNear = false; +void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) { + for (SUnit &SU : DAG->SUnits) { + // Update the latency of chain edges between v60 vector load or store + // instructions to be 1. These instruction cannot be scheduled in the + // same packet. + MachineInstr &MI1 = *SU.getInstr(); + auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII); + bool IsStoreMI1 = MI1.mayStore(); + bool IsLoadMI1 = MI1.mayLoad(); + if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1)) + continue; + for (SDep &SI : SU.Succs) { + if (SI.getKind() != SDep::Order || SI.getLatency() != 0) + continue; + MachineInstr &MI2 = *SI.getSUnit()->getInstr(); + if (!QII->isHVXVec(MI2)) + continue; + if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) { + SI.setLatency(1); + SU.setHeightDirty(); + // Change the dependence in the opposite direction too. + for (SDep &PI : SI.getSUnit()->Preds) { + if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order) + continue; + PI.setLatency(1); + SI.getSUnit()->setDepthDirty(); + } + } + } + } +} - UseBSBScheduling = hasV60TOps() && EnableBSBSched; +// Check if a call and subsequent A2_tfrpi instructions should maintain +// scheduling affinity. We are looking for the TFRI to be consumed in +// the next instruction. This should help reduce the instances of +// double register pairs being allocated and scheduled before a call +// when not used until after the call. This situation is exacerbated +// by the fact that we allocate the pair from the callee saves list, +// leading to excess spills and restores. +bool HexagonSubtarget::CallMutation::shouldTFRICallBind( + const HexagonInstrInfo &HII, const SUnit &Inst1, + const SUnit &Inst2) const { + if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) + return false; + + // TypeXTYPE are 64 bit operations. + unsigned Type = HII.getType(*Inst2.getInstr()); + return Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op || + Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM; +} + +void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) { + SUnit* LastSequentialCall = nullptr; + unsigned VRegHoldingRet = 0; + unsigned RetRegister; + SUnit* LastUseOfRet = nullptr; + auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); + auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + + // Currently we only catch the situation when compare gets scheduled + // before preceding call. + for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { + // Remember the call. + if (DAG->SUnits[su].getInstr()->isCall()) + LastSequentialCall = &DAG->SUnits[su]; + // Look for a compare that defines a predicate. + else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) + DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + // Look for call and tfri* instructions. + else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && + shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) + DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); + // Prevent redundant register copies between two calls, which are caused by + // both the return value and the argument for the next call being in %r0. + // Example: + // 1: <call1> + // 2: %vreg = COPY %r0 + // 3: <use of %vreg> + // 4: %r0 = ... + // 5: <call2> + // The scheduler would often swap 3 and 4, so an additional register is + // needed. This code inserts a Barrier dependence between 3 & 4 to prevent + // this. The same applies for %d0 and %v0/%w0, which are also handled. + else if (SchedRetvalOptimization) { + const MachineInstr *MI = DAG->SUnits[su].getInstr(); + if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || + MI->readsRegister(Hexagon::V0, &TRI))) { + // %vreg = COPY %r0 + VRegHoldingRet = MI->getOperand(0).getReg(); + RetRegister = MI->getOperand(1).getReg(); + LastUseOfRet = nullptr; + } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) + // <use of %X> + LastUseOfRet = &DAG->SUnits[su]; + else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) + // %r0 = ... + DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); + } + } +} + +void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) { + if (!EnableCheckBankConflict) + return; + + const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII); + + // Create artificial edges between loads that could likely cause a bank + // conflict. Since such loads would normally not have any dependency + // between them, we cannot rely on existing edges. + for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) { + SUnit &S0 = DAG->SUnits[i]; + MachineInstr &L0 = *S0.getInstr(); + if (!L0.mayLoad() || L0.mayStore() || + HII.getAddrMode(L0) != HexagonII::BaseImmOffset) + continue; + int Offset0; + unsigned Size0; + unsigned Base0 = HII.getBaseAndOffset(L0, Offset0, Size0); + // Is the access size is longer than the L1 cache line, skip the check. + if (Base0 == 0 || Size0 >= 32) + continue; + // Scan only up to 32 instructions ahead (to avoid n^2 complexity). + for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) { + SUnit &S1 = DAG->SUnits[j]; + MachineInstr &L1 = *S1.getInstr(); + if (!L1.mayLoad() || L1.mayStore() || + HII.getAddrMode(L1) != HexagonII::BaseImmOffset) + continue; + int Offset1; + unsigned Size1; + unsigned Base1 = HII.getBaseAndOffset(L1, Offset1, Size1); + if (Base1 == 0 || Size1 >= 32 || Base0 != Base1) + continue; + // Check bits 3 and 4 of the offset: if they differ, a bank conflict + // is unlikely. + if (((Offset0 ^ Offset1) & 0x18) != 0) + continue; + // Bits 3 and 4 are the same, add an artificial edge and set extra + // latency. + SDep A(&S0, SDep::Artificial); + A.setLatency(1); + S1.addPred(A, true); + } + } +} + +/// \brief Enable use of alias analysis during code generation (during MI +/// scheduling, DAGCombine, etc.). +bool HexagonSubtarget::useAA() const { + if (OptLevel != CodeGenOpt::None) + return true; + return false; } /// \brief Perform target specific adjustments to the latency of a schedule @@ -204,59 +361,17 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, updateLatency(*SrcInst, *DstInst, Dep); } -void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { - for (auto &SU : DAG->SUnits) { - if (!SU.isInstr()) - continue; - SmallVector<SDep, 4> Erase; - for (auto &D : SU.Preds) - if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF) - Erase.push_back(D); - for (auto &E : Erase) - SU.removePred(E); - } - - for (auto &SU : DAG->SUnits) { - // Update the latency of chain edges between v60 vector load or store - // instructions to be 1. These instruction cannot be scheduled in the - // same packet. - MachineInstr &MI1 = *SU.getInstr(); - auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII); - bool IsStoreMI1 = MI1.mayStore(); - bool IsLoadMI1 = MI1.mayLoad(); - if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1)) - continue; - for (auto &SI : SU.Succs) { - if (SI.getKind() != SDep::Order || SI.getLatency() != 0) - continue; - MachineInstr &MI2 = *SI.getSUnit()->getInstr(); - if (!QII->isHVXVec(MI2)) - continue; - if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) { - SI.setLatency(1); - SU.setHeightDirty(); - // Change the dependence in the opposite direction too. - for (auto &PI : SI.getSUnit()->Preds) { - if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order) - continue; - PI.setLatency(1); - SI.getSUnit()->setDepthDirty(); - } - } - } - } -} - void HexagonSubtarget::getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { - Mutations.push_back( - llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>()); + Mutations.push_back(llvm::make_unique<UsrOverflowMutation>()); + Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>()); + Mutations.push_back(llvm::make_unique<BankConflictMutation>()); } void HexagonSubtarget::getSMSMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { - Mutations.push_back( - llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>()); + Mutations.push_back(llvm::make_unique<UsrOverflowMutation>()); + Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>()); } // Pin the vtable to this file. |
