diff options
Diffstat (limited to 'gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp')
| -rw-r--r-- | gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 65 |
1 files changed, 61 insertions, 4 deletions
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 4d2f917278e..2dc6f2702b3 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -10,7 +10,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -87,6 +87,30 @@ static unsigned isCopyToExec(const MachineInstr &MI) { return AMDGPU::NoRegister; } +/// If \p MI is a logical operation on an exec value, +/// return the register copied to. +static unsigned isLogicalOpOnExec(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::S_AND_B64: + case AMDGPU::S_OR_B64: + case AMDGPU::S_XOR_B64: + case AMDGPU::S_ANDN2_B64: + case AMDGPU::S_ORN2_B64: + case AMDGPU::S_NAND_B64: + case AMDGPU::S_NOR_B64: + case AMDGPU::S_XNOR_B64: { + const MachineOperand &Src1 = MI.getOperand(1); + if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC) + return MI.getOperand(0).getReg(); + const MachineOperand &Src2 = MI.getOperand(2); + if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC) + return MI.getOperand(0).getReg(); + } + } + + return AMDGPU::NoRegister; +} + static unsigned getSaveExecOp(unsigned Opc) { switch (Opc) { case AMDGPU::S_AND_B64: @@ -181,6 +205,9 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { } bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -209,8 +236,24 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { // Scan backwards to find the def. auto CopyToExecInst = &*I; auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec); - if (CopyFromExecInst == E) + if (CopyFromExecInst == E) { + auto PrepareExecInst = std::next(I); + if (PrepareExecInst == E) + continue; + // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec + if (CopyToExecInst->getOperand(1).isKill() && + isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) { + DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); + + PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC); + + DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n'); + + CopyToExecInst->eraseFromParent(); + } + continue; + } if (isLiveOut(MBB, CopyToExec)) { // The copied register is live out and has a second use in another block. @@ -233,10 +276,12 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { break; } + bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI); + if (J->modifiesRegister(CopyToExec, TRI)) { if (SaveExecInst) { DEBUG(dbgs() << "Multiple instructions modify " - << PrintReg(CopyToExec, TRI) << '\n'); + << printReg(CopyToExec, TRI) << '\n'); SaveExecInst = nullptr; break; } @@ -245,7 +290,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END) break; - if (J->readsRegister(CopyFromExec, TRI)) { + if (ReadsCopyFromExec) { SaveExecInst = &*J; DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n'); continue; @@ -253,6 +298,18 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n'); break; } + } else if (ReadsCopyFromExec && !SaveExecInst) { + // Make sure no other instruction is trying to use this copy, before it + // will be rewritten by the saveexec, i.e. hasOneUse. There may have + // been another use, such as an inserted spill. For example: + // + // %sgpr0_sgpr1 = COPY %exec + // spill %sgpr0_sgpr1 + // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1 + // + DEBUG(dbgs() << "Found second use of save inst candidate: " + << *J << '\n'); + break; } if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) { |
