summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp309
1 files changed, 240 insertions, 69 deletions
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index a613a220e29..53aefe82973 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -9,7 +9,7 @@
//
/// \file
/// \brief This pass adds instructions to enable whole quad mode for pixel
-/// shaders.
+/// shaders, and whole wavefront mode for all programs.
///
/// Whole quad mode is required for derivative computations, but it interferes
/// with shader side effects (stores and atomics). This pass is run on the
@@ -29,6 +29,13 @@
/// ...
/// S_MOV_B64 EXEC, Tmp
///
+/// We also compute when a sequence of instructions requires Whole Wavefront
+/// Mode (WWM) and insert instructions to save and restore it:
+///
+/// S_OR_SAVEEXEC_B64 Tmp, -1
+/// ...
+/// S_MOV_B64 EXEC, Tmp
+///
/// In order to avoid excessive switching during sequences of Exact
/// instructions, the pass first analyzes which instructions must be run in WQM
/// (aka which instructions produce values that lead to derivative
@@ -54,10 +61,11 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -66,13 +74,13 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <vector>
@@ -84,7 +92,8 @@ namespace {
enum {
StateWQM = 0x1,
- StateExact = 0x2,
+ StateWWM = 0x2,
+ StateExact = 0x4,
};
struct PrintState {
@@ -94,20 +103,28 @@ public:
explicit PrintState(int State) : State(State) {}
};
+#ifndef NDEBUG
static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {
if (PS.State & StateWQM)
OS << "WQM";
- if (PS.State & StateExact) {
+ if (PS.State & StateWWM) {
if (PS.State & StateWQM)
OS << '|';
+ OS << "WWM";
+ }
+ if (PS.State & StateExact) {
+ if (PS.State & (StateWQM | StateWWM))
+ OS << '|';
OS << "Exact";
}
return OS;
}
+#endif
struct InstrInfo {
char Needs = 0;
+ char Disabled = 0;
char OutNeeds = 0;
};
@@ -128,6 +145,7 @@ struct WorkItem {
class SIWholeQuadMode : public MachineFunctionPass {
private:
+ CallingConv::ID CallingConv;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
@@ -136,12 +154,14 @@ private:
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
+ SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
void printInfo();
void markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);
- void markUsesWQM(const MachineInstr &MI, std::vector<WorkItem> &Worklist);
+ void markInstructionUses(const MachineInstr &MI, char Flag,
+ std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
@@ -159,9 +179,14 @@ private:
unsigned SaveWQM, unsigned LiveMaskReg);
void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
unsigned SavedWQM);
+ void toWWM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
+ unsigned SaveOrig);
+ void fromWWM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
+ unsigned SavedOrig);
void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);
void lowerLiveMaskQueries(unsigned LiveMaskReg);
+ void lowerCopyInstrs();
public:
static char ID;
@@ -196,9 +221,11 @@ FunctionPass *llvm::createSIWholeQuadModePass() {
return new SIWholeQuadMode;
}
-void SIWholeQuadMode::printInfo() {
+#ifndef NDEBUG
+LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() {
for (const auto &BII : Blocks) {
- dbgs() << "\nBB#" << BII.first->getNumber() << ":\n"
+ dbgs() << "\n"
+ << printMBBReference(*BII.first) << ":\n"
<< " InNeeds = " << PrintState(BII.second.InNeeds)
<< ", Needs = " << PrintState(BII.second.Needs)
<< ", OutNeeds = " << PrintState(BII.second.OutNeeds) << "\n\n";
@@ -213,27 +240,32 @@ void SIWholeQuadMode::printInfo() {
}
}
}
+#endif
void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {
InstrInfo &II = Instructions[&MI];
- assert(Flag == StateWQM || Flag == StateExact);
+ assert(!(Flag & StateExact) && Flag != 0);
- // Ignore if the instruction is already marked. The typical case is that we
- // mark an instruction WQM multiple times, but for atomics it can happen that
- // Flag is StateWQM, but Needs is already set to StateExact. In this case,
- // letting the atomic run in StateExact is correct as per the relevant specs.
- if (II.Needs)
+ // Remove any disabled states from the flag. The user that required it gets
+ // an undefined value in the helper lanes. For example, this can happen if
+ // the result of an atomic is used by instruction that requires WQM, where
+ // ignoring the request for WQM is correct as per the relevant specs.
+ Flag &= ~II.Disabled;
+
+ // Ignore if the flag is already encompassed by the existing needs, or we
+ // just disabled everything.
+ if ((II.Needs & Flag) == Flag)
return;
- II.Needs = Flag;
+ II.Needs |= Flag;
Worklist.push_back(&MI);
}
-/// Mark all instructions defining the uses in \p MI as WQM.
-void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,
- std::vector<WorkItem> &Worklist) {
+/// Mark all instructions defining the uses in \p MI with \p Flag.
+void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
+ std::vector<WorkItem> &Worklist) {
for (const MachineOperand &Use : MI.uses()) {
if (!Use.isReg() || !Use.isUse())
continue;
@@ -258,7 +290,7 @@ void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,
if (Value->isPHIDef())
continue;
- markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
+ markInstruction(*LIS->getInstructionFromIndex(Value->def), Flag,
Worklist);
}
@@ -266,7 +298,7 @@ void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,
}
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
- markInstruction(DefMI, StateWQM, Worklist);
+ markInstruction(DefMI, Flag, Worklist);
}
}
@@ -275,27 +307,72 @@ void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;
- bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
-
- for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {
- MachineBasicBlock &MBB = *BI;
+ bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs");
+ SmallVector<MachineInstr *, 4> SetInactiveInstrs;
+
+ // We need to visit the basic blocks in reverse post-order so that we visit
+ // defs before uses, in particular so that we don't accidentally mark an
+ // instruction as needing e.g. WQM before visiting it and realizing it needs
+ // WQM disabled.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
+ MachineBasicBlock &MBB = **BI;
+ BlockInfo &BBI = Blocks[&MBB];
for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
MachineInstr &MI = *II;
+ InstrInfo &III = Instructions[&MI];
unsigned Opcode = MI.getOpcode();
char Flags = 0;
- if (TII->isDS(Opcode)) {
+ if (TII->isDS(Opcode) && CallingConv == CallingConv::AMDGPU_PS) {
Flags = StateWQM;
} else if (TII->isWQM(Opcode)) {
// Sampling instructions don't need to produce results for all pixels
// in a quad, they just require all inputs of a quad to have been
// computed for derivatives.
- markUsesWQM(MI, Worklist);
+ markInstructionUses(MI, StateWQM, Worklist);
GlobalFlags |= StateWQM;
continue;
+ } else if (Opcode == AMDGPU::WQM) {
+ // The WQM intrinsic requires its output to have all the helper lanes
+ // correct, so we need it to be in WQM.
+ Flags = StateWQM;
+ LowerToCopyInstrs.push_back(&MI);
+ } else if (Opcode == AMDGPU::WWM) {
+ // The WWM intrinsic doesn't make the same guarantee, and plus it needs
+ // to be executed in WQM or Exact so that its copy doesn't clobber
+ // inactive lanes.
+ markInstructionUses(MI, StateWWM, Worklist);
+ GlobalFlags |= StateWWM;
+ LowerToCopyInstrs.push_back(&MI);
+ continue;
+ } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
+ Opcode == AMDGPU::V_SET_INACTIVE_B64) {
+ III.Disabled = StateWWM;
+ MachineOperand &Inactive = MI.getOperand(2);
+ if (Inactive.isReg()) {
+ if (Inactive.isUndef()) {
+ LowerToCopyInstrs.push_back(&MI);
+ } else {
+ unsigned Reg = Inactive.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg))
+ markInstruction(DefMI, StateWWM, Worklist);
+ }
+ }
+ }
+ SetInactiveInstrs.push_back(&MI);
+ continue;
} else if (TII->isDisableWQM(MI)) {
- Flags = StateExact;
+ BBI.Needs |= StateExact;
+ if (!(BBI.InNeeds & StateExact)) {
+ BBI.InNeeds |= StateExact;
+ Worklist.push_back(&MBB);
+ }
+ GlobalFlags |= StateExact;
+ III.Disabled = StateWQM | StateWWM;
+ continue;
} else {
if (Opcode == AMDGPU::SI_PS_LIVE) {
LiveMaskQueries.push_back(&MI);
@@ -326,6 +403,14 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
}
}
+ // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
+ // ever used anywhere in the function. This implements the corresponding
+ // semantics of @llvm.amdgcn.set.inactive.
+ if (GlobalFlags & StateWQM) {
+ for (MachineInstr *MI : SetInactiveInstrs)
+ markInstruction(*MI, StateWQM, Worklist);
+ }
+
return GlobalFlags;
}
@@ -337,22 +422,24 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
// Control flow-type instructions and stores to temporary memory that are
// followed by WQM computations must themselves be in WQM.
- if ((II.OutNeeds & StateWQM) && !II.Needs &&
+ if ((II.OutNeeds & StateWQM) && !(II.Disabled & StateWQM) &&
(MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) {
Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM;
}
// Propagate to block level
- BI.Needs |= II.Needs;
- if ((BI.InNeeds | II.Needs) != BI.InNeeds) {
- BI.InNeeds |= II.Needs;
- Worklist.push_back(MBB);
+ if (II.Needs & StateWQM) {
+ BI.Needs |= StateWQM;
+ if (!(BI.InNeeds & StateWQM)) {
+ BI.InNeeds |= StateWQM;
+ Worklist.push_back(MBB);
+ }
}
// Propagate backwards within block
if (MachineInstr *PrevMI = MI.getPrevNode()) {
- char InNeeds = II.Needs | II.OutNeeds;
+ char InNeeds = (II.Needs & ~StateWWM) | II.OutNeeds;
if (!PrevMI->isPHI()) {
InstrInfo &PrevII = Instructions[PrevMI];
if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {
@@ -363,10 +450,10 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
}
// Propagate WQM flag to instruction inputs
- assert(II.Needs != (StateWQM | StateExact));
+ assert(!(II.Needs & StateExact));
- if (II.Needs == StateWQM)
- markUsesWQM(MI, Worklist);
+ if (II.Needs != 0)
+ markInstructionUses(MI, II.Needs, Worklist);
}
void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
@@ -558,6 +645,29 @@ void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
LIS->InsertMachineInstrInMaps(*MI);
}
+void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Before,
+ unsigned SaveOrig) {
+ MachineInstr *MI;
+
+ assert(SaveOrig);
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
+ SaveOrig)
+ .addImm(-1);
+ LIS->InsertMachineInstrInMaps(*MI);
+}
+
+void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Before,
+ unsigned SavedOrig) {
+ MachineInstr *MI;
+
+ assert(SavedOrig);
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), AMDGPU::EXEC)
+ .addReg(SavedOrig);
+ LIS->InsertMachineInstrInMaps(*MI);
+}
+
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
bool isEntry) {
auto BII = Blocks.find(&MBB);
@@ -566,45 +676,66 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
const BlockInfo &BI = BII->second;
- if (!(BI.InNeeds & StateWQM))
- return;
-
// This is a non-entry block that is WQM throughout, so no need to do
// anything.
- if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
+ if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact)
return;
- DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");
+ DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB) << ":\n");
unsigned SavedWQMReg = 0;
+ unsigned SavedNonWWMReg = 0;
bool WQMFromExec = isEntry;
- char State = isEntry ? StateExact : StateWQM;
+ char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
+ char NonWWMState = 0;
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
if (isEntry)
++II; // Skip the instruction that saves LiveMask
- MachineBasicBlock::iterator First = IE;
+ // This stores the first instruction where it's safe to switch from WQM to
+ // Exact or vice versa.
+ MachineBasicBlock::iterator FirstWQM = IE;
+
+ // This stores the first instruction where it's safe to switch from WWM to
+ // Exact/WQM or to switch to WWM. It must always be the same as, or after,
+ // FirstWQM since if it's safe to switch to/from WWM, it must be safe to
+ // switch to/from WQM as well.
+ MachineBasicBlock::iterator FirstWWM = IE;
for (;;) {
MachineBasicBlock::iterator Next = II;
- char Needs = 0;
+ char Needs = StateExact | StateWQM; // WWM is disabled by default
char OutNeeds = 0;
- if (First == IE)
- First = II;
+ if (FirstWQM == IE)
+ FirstWQM = II;
+
+ if (FirstWWM == IE)
+ FirstWWM = II;
+ // First, figure out the allowed states (Needs) based on the propagated
+ // flags.
if (II != IE) {
MachineInstr &MI = *II;
if (requiresCorrectState(MI)) {
auto III = Instructions.find(&MI);
if (III != Instructions.end()) {
- Needs = III->second.Needs;
+ if (III->second.Needs & StateWWM)
+ Needs = StateWWM;
+ else if (III->second.Needs & StateWQM)
+ Needs = StateWQM;
+ else
+ Needs &= ~III->second.Disabled;
OutNeeds = III->second.OutNeeds;
}
+ } else {
+ // If the instruction doesn't actually need a correct EXEC, then we can
+ // safely leave WWM enabled.
+ Needs = StateExact | StateWQM | StateWWM;
}
- if (MI.isTerminator() && !Needs && OutNeeds == StateExact)
+ if (MI.isTerminator() && OutNeeds == StateExact)
Needs = StateExact;
if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
@@ -617,20 +748,45 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
Needs = StateWQM;
else if (BI.OutNeeds == StateExact)
Needs = StateExact;
+ else
+ Needs = StateWQM | StateExact;
}
- if (Needs) {
- if (Needs != State) {
- MachineBasicBlock::iterator Before =
- prepareInsertion(MBB, First, II, Needs == StateWQM,
- Needs == StateExact || WQMFromExec);
+ // Now, transition if necessary.
+ if (!(Needs & State)) {
+ MachineBasicBlock::iterator First;
+ if (State == StateWWM || Needs == StateWWM) {
+ // We must switch to or from WWM
+ First = FirstWWM;
+ } else {
+ // We only need to switch to/from WQM, so we can use FirstWQM
+ First = FirstWQM;
+ }
- if (Needs == StateExact) {
+ MachineBasicBlock::iterator Before =
+ prepareInsertion(MBB, First, II, Needs == StateWQM,
+ Needs == StateExact || WQMFromExec);
+
+ if (State == StateWWM) {
+ assert(SavedNonWWMReg);
+ fromWWM(MBB, Before, SavedNonWWMReg);
+ State = NonWWMState;
+ }
+
+ if (Needs == StateWWM) {
+ NonWWMState = State;
+ SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ toWWM(MBB, Before, SavedNonWWMReg);
+ State = StateWWM;
+ } else {
+ if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
if (!WQMFromExec && (OutNeeds & StateWQM))
SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
- } else {
+ State = StateExact;
+ } else if (State == StateExact && (Needs & StateWQM) &&
+ !(Needs & StateExact)) {
assert(WQMFromExec == (SavedWQMReg == 0));
toWQM(MBB, Before, SavedWQMReg);
@@ -639,12 +795,19 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
LIS->createAndComputeVirtRegInterval(SavedWQMReg);
SavedWQMReg = 0;
}
+ State = StateWQM;
+ } else {
+ // We can get here if we transitioned from WWM to a non-WWM state that
+ // already matches our needs, but we shouldn't need to do anything.
+ assert(Needs & State);
}
-
- State = Needs;
}
+ }
- First = IE;
+ if (Needs != (StateExact | StateWQM | StateWWM)) {
+ if (Needs != (StateExact | StateWQM))
+ FirstWQM = IE;
+ FirstWWM = IE;
}
if (II == IE)
@@ -666,13 +829,20 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
}
}
-bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
- return false;
+void SIWholeQuadMode::lowerCopyInstrs() {
+ for (MachineInstr *MI : LowerToCopyInstrs) {
+ for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
+ MI->RemoveOperand(i);
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
+}
+bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Instructions.clear();
Blocks.clear();
LiveMaskQueries.clear();
+ LowerToCopyInstrs.clear();
+ CallingConv = MF.getFunction().getCallingConv();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -682,14 +852,13 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LIS = &getAnalysis<LiveIntervals>();
char GlobalFlags = analyzeFunction(MF);
+ unsigned LiveMaskReg = 0;
if (!(GlobalFlags & StateWQM)) {
lowerLiveMaskQueries(AMDGPU::EXEC);
- return !LiveMaskQueries.empty();
- }
-
- // Store a copy of the original live mask when required
- unsigned LiveMaskReg = 0;
- {
+ if (!(GlobalFlags & StateWWM))
+ return !LiveMaskQueries.empty();
+ } else {
+ // Store a copy of the original live mask when required
MachineBasicBlock &Entry = MF.front();
MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
@@ -701,13 +870,15 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LIS->InsertMachineInstrInMaps(*MI);
}
+ lowerLiveMaskQueries(LiveMaskReg);
+
if (GlobalFlags == StateWQM) {
// For a shader that needs only WQM, we can just set it once.
BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);
- lowerLiveMaskQueries(LiveMaskReg);
+ lowerCopyInstrs();
// EntryMI may become invalid here
return true;
}
@@ -715,7 +886,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
DEBUG(printInfo());
- lowerLiveMaskQueries(LiveMaskReg);
+ lowerCopyInstrs();
// Handle the general case
for (auto BII : Blocks)