summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp43
1 files changed, 30 insertions, 13 deletions
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0aad8f0843d..78318198034 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -14,7 +14,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -290,11 +290,11 @@ void SIFoldOperands::foldOperand(
// copy since a subregister use tied to a full register def doesn't really
// make sense. e.g. don't fold:
//
- // %vreg1 = COPY %vreg0:sub1
- // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0>
+ // %1 = COPY %0:sub1
+ // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
//
// into
- // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0>
+ // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
return;
}
@@ -628,7 +628,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand *NonInlineUse = nullptr;
int NonInlineUseOpNo = -1;
- MachineRegisterInfo::use_iterator NextUse, NextInstUse;
+ MachineRegisterInfo::use_iterator NextUse;
for (MachineRegisterInfo::use_iterator
Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
Use != E; Use = NextUse) {
@@ -723,12 +723,15 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
}
}
+// Clamp patterns are canonically selected to v_max_* instructions, so only
+// handle them.
const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
unsigned Op = MI.getOpcode();
switch (Op) {
case AMDGPU::V_MAX_F32_e64:
case AMDGPU::V_MAX_F16_e64:
- case AMDGPU::V_MAX_F64: {
+ case AMDGPU::V_MAX_F64:
+ case AMDGPU::V_PK_MAX_F16: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
return nullptr;
@@ -736,14 +739,24 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (!Src0->isReg() || !Src1->isReg() ||
+ Src0->getReg() != Src1->getReg() ||
Src0->getSubReg() != Src1->getSubReg() ||
Src0->getSubReg() != AMDGPU::NoSubRegister)
return nullptr;
// Can't fold up if we have modifiers.
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return nullptr;
+
+ unsigned Src0Mods
+ = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
+ unsigned Src1Mods
+ = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
+
+ // Having a 0 op_sel_hi would require swizzling the output in the source
+ // instruction, which we can't do.
+ unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
+ if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
return nullptr;
return Src0;
}
@@ -765,14 +778,18 @@ static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
return true;
}
+// FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
const MachineOperand *ClampSrc = isClamp(MI);
if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
return false;
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
- if (!TII->hasFPClamp(*Def))
+
+ // The type of clamp must be compatible.
+ if (TII->getClampMask(*Def) != TII->getClampMask(MI))
return false;
+
MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
if (!DefClamp)
return false;
@@ -909,7 +926,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
}
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
MRI = &MF.getRegInfo();
@@ -954,9 +971,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// Prevent folding operands backwards in the function. For example,
// the COPY opcode must not be replaced by 1 in this example:
//
- // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
+ // %3 = COPY %vgpr0; VGPR_32:%3
// ...
- // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
+ // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
MachineOperand &Dst = MI.getOperand(0);
if (Dst.isReg() &&
!TargetRegisterInfo::isVirtualRegister(Dst.getReg()))