diff options
| author | 2018-04-06 14:26:03 +0000 | |
|---|---|---|
| committer | 2018-04-06 14:26:03 +0000 | |
| commit | bdabc2f19ffb9e20600dad6e8a300842a7bda50e (patch) | |
| tree | c50e7b2e5449b074651bb82a58517a8ebc4a8cf7 /gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | |
| parent | Print a 'p' flag for file descriptors that were opened after pledge(2). (diff) | |
| download | wireguard-openbsd-bdabc2f19ffb9e20600dad6e8a300842a7bda50e.tar.xz wireguard-openbsd-bdabc2f19ffb9e20600dad6e8a300842a7bda50e.zip | |
Import LLVM 6.0.1 release including clang, lld and lldb.
"where is the kaboom?" deraadt@
Diffstat (limited to 'gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
| -rw-r--r-- | gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 43 |
1 files changed, 30 insertions, 13 deletions
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 0aad8f0843d..78318198034 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -14,7 +14,7 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -290,11 +290,11 @@ void SIFoldOperands::foldOperand( // copy since a subregister use tied to a full register def doesn't really // make sense. e.g. don't fold: // - // %vreg1 = COPY %vreg0:sub1 - // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0> + // %1 = COPY %0:sub1 + // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0> // // into - // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0> + // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0> if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) return; } @@ -628,7 +628,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand *NonInlineUse = nullptr; int NonInlineUseOpNo = -1; - MachineRegisterInfo::use_iterator NextUse, NextInstUse; + MachineRegisterInfo::use_iterator NextUse; for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end(); Use != E; Use = NextUse) { @@ -723,12 +723,15 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, } } +// Clamp patterns are canonically selected to v_max_* instructions, so only +// handle them. const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { unsigned Op = MI.getOpcode(); switch (Op) { case AMDGPU::V_MAX_F32_e64: case AMDGPU::V_MAX_F16_e64: - case AMDGPU::V_MAX_F64: { + case AMDGPU::V_MAX_F64: + case AMDGPU::V_PK_MAX_F16: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) return nullptr; @@ -736,14 +739,24 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src0->isReg() || !Src1->isReg() || + Src0->getReg() != Src1->getReg() || Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; // Can't fold up if we have modifiers. - if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || - TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || - TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + return nullptr; + + unsigned Src0Mods + = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm(); + unsigned Src1Mods + = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm(); + + // Having a 0 op_sel_hi would require swizzling the output in the source + // instruction, which we can't do. + unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0; + if (Src0Mods != UnsetMods && Src1Mods != UnsetMods) return nullptr; return Src0; } @@ -765,14 +778,18 @@ static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) { return true; } +// FIXME: Clamp for v_mad_mixhi_f16 handled during isel. bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { const MachineOperand *ClampSrc = isClamp(MI); if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg())) return false; MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg()); - if (!TII->hasFPClamp(*Def)) + + // The type of clamp must be compatible. + if (TII->getClampMask(*Def) != TII->getClampMask(MI)) return false; + MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp); if (!DefClamp) return false; @@ -909,7 +926,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) { } bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; MRI = &MF.getRegInfo(); @@ -954,9 +971,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // Prevent folding operands backwards in the function. For example, // the COPY opcode must not be replaced by 1 in this example: // - // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3 + // %3 = COPY %vgpr0; VGPR_32:%3 // ... - // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use> + // %vgpr0 = V_MOV_B32_e32 1, implicit %exec MachineOperand &Dst = MI.getOperand(0); if (Dst.isReg() && !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) |
