summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp70
1 files changed, 47 insertions, 23 deletions
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6b8d2566597..848be32cd51 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -1,19 +1,17 @@
-//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-/// \file
//===----------------------------------------------------------------------===//
-
#include "SIMachineFunctionInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -22,6 +20,11 @@
using namespace llvm;
+static cl::opt<bool> EnableSpillSGPRToVGPR(
+ "amdgpu-spill-sgpr-to-vgpr",
+ cl::desc("Enable spilling VGPRs to SGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
// Pin the vtable to this file.
void SIMachineFunctionInfo::anchor() {}
@@ -48,6 +51,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
PSInputAddr(0),
ReturnsVoid(true),
+ MaximumWorkGroupSize(0),
+ DebuggerReservedVGPRCount(0),
+ DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
+ DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
LDSWaveSpillSize(0),
PSInputEna(0),
NumUserSGPRs(0),
@@ -56,6 +63,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
HasSpilledVGPRs(false),
HasNonSpillStackObjects(false),
HasFlatInstructions(false),
+ NumSpilledSGPRs(0),
+ NumSpilledVGPRs(0),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -65,34 +74,37 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
GridWorkgroupCountX(false),
GridWorkgroupCountY(false),
GridWorkgroupCountZ(false),
- WorkGroupIDX(true),
+ WorkGroupIDX(false),
WorkGroupIDY(false),
WorkGroupIDZ(false),
WorkGroupInfo(false),
PrivateSegmentWaveByteOffset(false),
- WorkItemIDX(true),
+ WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- if (getShaderType() == ShaderType::COMPUTE)
+ if (!AMDGPU::isShader(F->getCallingConv())) {
KernargSegmentPtr = true;
+ WorkGroupIDX = true;
+ WorkItemIDX = true;
+ }
- if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
WorkGroupIDY = true;
- if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
// X, XY, and XYZ are the only supported combinations, so make sure Y is
@@ -100,7 +112,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (WorkItemIDZ)
WorkItemIDY = true;
- bool MaySpill = ST.isVGPRSpillingEnabled(this);
+ bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo->hasStackObjects();
if (HasStackObjects || MaySpill)
@@ -112,14 +124,25 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
DispatchPtr = true;
+
+ if (F->hasFnAttribute("amdgpu-queue-ptr"))
+ QueuePtr = true;
}
// We don't need to worry about accessing spills with flat instructions.
// TODO: On VI where we must use flat for global, we should be able to omit
// this if it is never used for generic access.
- if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS &&
+ if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
ST.isAmdHsaOS())
FlatScratchInit = true;
+
+ if (AMDGPU::isCompute(F->getCallingConv()))
+ MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F);
+ else
+ MaximumWorkGroupSize = ST.getWavefrontSize();
+
+ if (ST.debuggerReserveRegs())
+ DebuggerReservedVGPRCount = 4;
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -158,13 +181,17 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return FlatScratchInitUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF,
unsigned FrameIndex,
unsigned SubIdx) {
+ if (!EnableSpillSGPRToVGPR)
+ return SpilledReg();
+
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
MachineRegisterInfo &MRI = MF->getRegInfo();
int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
Offset += SubIdx * 4;
@@ -176,13 +203,13 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
Spill.Lane = Lane;
if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
+ unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
+ *MF);
if (LaneVGPR == AMDGPU::NoRegister)
// We have no VGPRs left for spilling SGPRs.
return Spill;
-
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
// Add this register as live-in to all blocks to avoid machine verifer
@@ -199,8 +226,5 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
const MachineFunction &MF) const {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- // FIXME: We should get this information from kernel attributes if it
- // is available.
- return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
+ return MaximumWorkGroupSize;
}