summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp')
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp65
1 files changed, 61 insertions, 4 deletions
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 4d2f917278e..2dc6f2702b3 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -10,7 +10,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -87,6 +87,30 @@ static unsigned isCopyToExec(const MachineInstr &MI) {
return AMDGPU::NoRegister;
}
+/// If \p MI is a logical operation on an exec value,
+/// return the register copied to.
+static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_AND_B64:
+ case AMDGPU::S_OR_B64:
+ case AMDGPU::S_XOR_B64:
+ case AMDGPU::S_ANDN2_B64:
+ case AMDGPU::S_ORN2_B64:
+ case AMDGPU::S_NAND_B64:
+ case AMDGPU::S_NOR_B64:
+ case AMDGPU::S_XNOR_B64: {
+ const MachineOperand &Src1 = MI.getOperand(1);
+ if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
+ return MI.getOperand(0).getReg();
+ const MachineOperand &Src2 = MI.getOperand(2);
+ if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
+ return MI.getOperand(0).getReg();
+ }
+ }
+
+ return AMDGPU::NoRegister;
+}
+
static unsigned getSaveExecOp(unsigned Opc) {
switch (Opc) {
case AMDGPU::S_AND_B64:
@@ -181,6 +205,9 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
}
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -209,8 +236,24 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
// Scan backwards to find the def.
auto CopyToExecInst = &*I;
auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
- if (CopyFromExecInst == E)
+ if (CopyFromExecInst == E) {
+ auto PrepareExecInst = std::next(I);
+ if (PrepareExecInst == E)
+ continue;
+ // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
+ if (CopyToExecInst->getOperand(1).isKill() &&
+ isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
+ DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
+
+ PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
+
+ DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
+
+ CopyToExecInst->eraseFromParent();
+ }
+
continue;
+ }
if (isLiveOut(MBB, CopyToExec)) {
// The copied register is live out and has a second use in another block.
@@ -233,10 +276,12 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
break;
}
+ bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);
+
if (J->modifiesRegister(CopyToExec, TRI)) {
if (SaveExecInst) {
DEBUG(dbgs() << "Multiple instructions modify "
- << PrintReg(CopyToExec, TRI) << '\n');
+ << printReg(CopyToExec, TRI) << '\n');
SaveExecInst = nullptr;
break;
}
@@ -245,7 +290,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
break;
- if (J->readsRegister(CopyFromExec, TRI)) {
+ if (ReadsCopyFromExec) {
SaveExecInst = &*J;
DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
continue;
@@ -253,6 +298,18 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
break;
}
+ } else if (ReadsCopyFromExec && !SaveExecInst) {
+ // Make sure no other instruction is trying to use this copy, before it
+ // will be rewritten by the saveexec, i.e. hasOneUse. There may have
+ // been another use, such as an inserted spill. For example:
+ //
+ // %sgpr0_sgpr1 = COPY %exec
+ // spill %sgpr0_sgpr1
+ // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
+ //
+ DEBUG(dbgs() << "Found second use of save inst candidate: "
+ << *J << '\n');
+ break;
}
if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {